All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bytedeco.javacpp.cudnn Maven / Gradle / Ivy

The newest version!
// Targeted by JavaCPP version 1.4.4: DO NOT EDIT THIS FILE

package org.bytedeco.javacpp;

import java.nio.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.javacpp.annotation.*;

import static org.bytedeco.javacpp.cuda.*;

public class cudnn extends org.bytedeco.javacpp.presets.cudnn {
    static { Loader.load(); }

// Parsed from 

/*
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */

/*   cudnn : Neural Networks Library

*/

// #if !defined(CUDNN_H_)
// #define CUDNN_H_

public static final int CUDNN_MAJOR = 7;
public static final int CUDNN_MINOR = 4;
public static final int CUDNN_PATCHLEVEL = 1;

public static final int CUDNN_VERSION = (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL);

// #include "driver_types.h"
// #include 

// #ifndef CUDNNWINAPI
// #ifdef _WIN32
// #define CUDNNWINAPI __stdcall
// #else
// #define CUDNNWINAPI
// #endif
// #endif

// #if defined(__cplusplus)
// #endif

@Opaque public static class cudnnContext extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnContext() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnContext(Pointer p) { super(p); }
}

public static native @Cast("size_t") long cudnnGetVersion();

/* Returns CUDA Runtime version statically linked against cudnn */
public static native @Cast("size_t") long cudnnGetCudartVersion();

/*
 * CUDNN return codes
 */
/** enum cudnnStatus_t */
public static final int
    CUDNN_STATUS_SUCCESS                      = 0,
    CUDNN_STATUS_NOT_INITIALIZED              = 1,
    CUDNN_STATUS_ALLOC_FAILED                 = 2,
    CUDNN_STATUS_BAD_PARAM                    = 3,
    CUDNN_STATUS_INTERNAL_ERROR               = 4,
    CUDNN_STATUS_INVALID_VALUE                = 5,
    CUDNN_STATUS_ARCH_MISMATCH                = 6,
    CUDNN_STATUS_MAPPING_ERROR                = 7,
    CUDNN_STATUS_EXECUTION_FAILED             = 8,
    CUDNN_STATUS_NOT_SUPPORTED                = 9,
    CUDNN_STATUS_LICENSE_ERROR                = 10,
    CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
    CUDNN_STATUS_RUNTIME_IN_PROGRESS          = 12,
    CUDNN_STATUS_RUNTIME_FP_OVERFLOW          = 13;

/* human-readable error messages */
public static native @Cast("const char*") BytePointer cudnnGetErrorString(@Cast("cudnnStatus_t") int status);

/* Forward definition in this version only */
@Opaque public static class cudnnRuntimeTag_t extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnRuntimeTag_t() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnRuntimeTag_t(Pointer p) { super(p); }
}

/** enum cudnnErrQueryMode_t */
public static final int
    CUDNN_ERRQUERY_RAWCODE     = 0,
    CUDNN_ERRQUERY_NONBLOCKING = 1,
    CUDNN_ERRQUERY_BLOCKING    = 2;

public static native @Cast("cudnnStatus_t") int cudnnQueryRuntimeError(cudnnContext handle, @Cast("cudnnStatus_t*") IntPointer rstatus, @Cast("cudnnErrQueryMode_t") int mode, cudnnRuntimeTag_t tag);
public static native @Cast("cudnnStatus_t") int cudnnQueryRuntimeError(cudnnContext handle, @Cast("cudnnStatus_t*") IntBuffer rstatus, @Cast("cudnnErrQueryMode_t") int mode, cudnnRuntimeTag_t tag);
public static native @Cast("cudnnStatus_t") int cudnnQueryRuntimeError(cudnnContext handle, @Cast("cudnnStatus_t*") int[] rstatus, @Cast("cudnnErrQueryMode_t") int mode, cudnnRuntimeTag_t tag);

// #ifndef __LIBRARY_TYPES_H__

/** enum libraryPropertyType_t */
public static final int MAJOR_VERSION = 0, MINOR_VERSION = 1, PATCH_LEVEL = 2;

// #endif

public static native @Cast("cudnnStatus_t") int cudnnGetProperty(@Cast("libraryPropertyType") int type, IntPointer value);
public static native @Cast("cudnnStatus_t") int cudnnGetProperty(@Cast("libraryPropertyType") int type, IntBuffer value);
public static native @Cast("cudnnStatus_t") int cudnnGetProperty(@Cast("libraryPropertyType") int type, int[] value);

public static native @Cast("cudnnStatus_t") int cudnnCreate(@ByPtrPtr cudnnContext handle);
public static native @Cast("cudnnStatus_t") int cudnnDestroy(cudnnContext handle);
public static native @Cast("cudnnStatus_t") int cudnnSetStream(cudnnContext handle, CUstream_st streamId);
public static native @Cast("cudnnStatus_t") int cudnnGetStream(cudnnContext handle, @ByPtrPtr CUstream_st streamId);

/* Data structures to represent Image/Filter and the Neural Network Layer */
@Opaque public static class cudnnTensorStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnTensorStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnTensorStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnConvolutionStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnConvolutionStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnConvolutionStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnPoolingStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnPoolingStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnPoolingStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnFilterStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnFilterStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnFilterStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnLRNStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnLRNStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnLRNStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnActivationStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnActivationStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnActivationStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnSpatialTransformerStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnSpatialTransformerStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnSpatialTransformerStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnOpTensorStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnOpTensorStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnOpTensorStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnReduceTensorStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnReduceTensorStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnReduceTensorStruct(Pointer p) { super(p); }
}
@Opaque public static class cudnnCTCLossStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnCTCLossStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnCTCLossStruct(Pointer p) { super(p); }
}
/*
* CUDNN data type
*/
/** enum cudnnDataType_t */
public static final int
    CUDNN_DATA_FLOAT   = 0,
    CUDNN_DATA_DOUBLE  = 1,
    CUDNN_DATA_HALF    = 2,
    CUDNN_DATA_INT8    = 3,
    CUDNN_DATA_INT32   = 4,
    CUDNN_DATA_INT8x4  = 5,
    CUDNN_DATA_UINT8   = 6,
    CUDNN_DATA_UINT8x4 = 7,
    CUDNN_DATA_INT8x32 = 8;

/*
* CUDNN math type
*/
/** enum cudnnMathType_t */
public static final int
    CUDNN_DEFAULT_MATH                    = 0,
    CUDNN_TENSOR_OP_MATH                  = 1,
    CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2;

/*
 * CUDNN propagate Nan
 */
/** enum cudnnNanPropagation_t */
public static final int
    CUDNN_NOT_PROPAGATE_NAN = 0,
    CUDNN_PROPAGATE_NAN     = 1;

/*
 * CUDNN Determinism
 */
/** enum cudnnDeterminism_t */
public static final int
    CUDNN_NON_DETERMINISTIC = 0,
    CUDNN_DETERMINISTIC     = 1;

/* Maximum supported number of tensor dimensions */
public static final int CUDNN_DIM_MAX = 8;

/* Create an instance of a generic Tensor descriptor */
public static native @Cast("cudnnStatus_t") int cudnnCreateTensorDescriptor(@Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct tensorDesc);
public static native @Cast("cudnnStatus_t") int cudnnCreateTensorDescriptor(@Cast("cudnnTensorStruct**") PointerPointer tensorDesc);

/** enum cudnnTensorFormat_t */
public static final int
    CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
    CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
    CUDNN_TENSOR_NCHW_VECT_C = 2; /* each image point is vector of element of C : the length of the vector is carried by the data type*/

public static native @Cast("cudnnStatus_t") int cudnnSetTensor4dDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnTensorFormat_t") int format,
                           @Cast("cudnnDataType_t") int dataType,
                           int n,
                           int c,
                           int h,
                           int w);                   /* width of input section */

public static native @Cast("cudnnStatus_t") int cudnnSetTensor4dDescriptorEx(cudnnTensorStruct tensorDesc,
                             @Cast("cudnnDataType_t") int dataType,
                             int n,
                             int c,
                             int h,
                             int w,
                             int nStride,
                             int cStride,
                             int hStride,
                             int wStride);

public static native @Cast("cudnnStatus_t") int cudnnGetTensor4dDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t*") IntPointer dataType,
                           IntPointer n,
                           IntPointer c,
                           IntPointer h,
                           IntPointer w,
                           IntPointer nStride,
                           IntPointer cStride,
                           IntPointer hStride,
                           IntPointer wStride);
public static native @Cast("cudnnStatus_t") int cudnnGetTensor4dDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t*") IntBuffer dataType,
                           IntBuffer n,
                           IntBuffer c,
                           IntBuffer h,
                           IntBuffer w,
                           IntBuffer nStride,
                           IntBuffer cStride,
                           IntBuffer hStride,
                           IntBuffer wStride);
public static native @Cast("cudnnStatus_t") int cudnnGetTensor4dDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t*") int[] dataType,
                           int[] n,
                           int[] c,
                           int[] h,
                           int[] w,
                           int[] nStride,
                           int[] cStride,
                           int[] hStride,
                           int[] wStride);

public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           int nbDims,
                           @Const IntPointer dimA,
                           @Const IntPointer strideA);
public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           int nbDims,
                           @Const IntBuffer dimA,
                           @Const IntBuffer strideA);
public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           int nbDims,
                           @Const int[] dimA,
                           @Const int[] strideA);

public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptorEx(cudnnTensorStruct tensorDesc,
                             @Cast("cudnnTensorFormat_t") int format,
                             @Cast("cudnnDataType_t") int dataType,
                             int nbDims,
                             @Const IntPointer dimA);
public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptorEx(cudnnTensorStruct tensorDesc,
                             @Cast("cudnnTensorFormat_t") int format,
                             @Cast("cudnnDataType_t") int dataType,
                             int nbDims,
                             @Const IntBuffer dimA);
public static native @Cast("cudnnStatus_t") int cudnnSetTensorNdDescriptorEx(cudnnTensorStruct tensorDesc,
                             @Cast("cudnnTensorFormat_t") int format,
                             @Cast("cudnnDataType_t") int dataType,
                             int nbDims,
                             @Const int[] dimA);

public static native @Cast("cudnnStatus_t") int cudnnGetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") IntPointer dataType,
                           IntPointer nbDims,
                           IntPointer dimA,
                           IntPointer strideA);
public static native @Cast("cudnnStatus_t") int cudnnGetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") IntBuffer dataType,
                           IntBuffer nbDims,
                           IntBuffer dimA,
                           IntBuffer strideA);
public static native @Cast("cudnnStatus_t") int cudnnGetTensorNdDescriptor(cudnnTensorStruct tensorDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") int[] dataType,
                           int[] nbDims,
                           int[] dimA,
                           int[] strideA);

public static native @Cast("cudnnStatus_t") int cudnnGetTensorSizeInBytes(cudnnTensorStruct tensorDesc, @Cast("size_t*") SizeTPointer size);

/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride

   1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
   input_stride :  c x h x h_stride
   feature_stride : h x h_stride
   h_stride  :  >= w  ( h_stride = w if no padding)
   w_stride  : 1


   2)Example of all images in row major with features maps interleaved
   input_stride :  c x h x h_stride
   feature_stride : 1
   h_stride  :  w x c
   w_stride  : c

   3)Example of all images in column major order one batch of features after the other (with optional padding on column)
   input_stride :  c x w x w_stride
   feature_stride : w x w_stride
   h_stride  :  1
   w_stride  :  >= h

*/

/* Destroy an instance of Tensor4d descriptor */
public static native @Cast("cudnnStatus_t") int cudnnDestroyTensorDescriptor(cudnnTensorStruct tensorDesc);

/* Tensor layout conversion helper (y = alpha * x + beta * y) */
public static native @Cast("cudnnStatus_t") int cudnnTransformTensor(cudnnContext handle,
                     @Const Pointer alpha,
                     cudnnTensorStruct xDesc,
                     @Const Pointer x,
                     @Const Pointer beta,
                     cudnnTensorStruct yDesc,
                     Pointer y);

/* Tensor Bias addition : C = alpha * A + beta * C  */
public static native @Cast("cudnnStatus_t") int cudnnAddTensor(cudnnContext handle,
               @Const Pointer alpha,
               cudnnTensorStruct aDesc,
               @Const Pointer A,
               @Const Pointer beta,
               cudnnTensorStruct cDesc,
               Pointer C);

/*
* CUDNN OpTensor op type
*/
/** enum cudnnOpTensorOp_t */
public static final int
    CUDNN_OP_TENSOR_ADD  = 0,
    CUDNN_OP_TENSOR_MUL  = 1,
    CUDNN_OP_TENSOR_MIN  = 2,
    CUDNN_OP_TENSOR_MAX  = 3,
    CUDNN_OP_TENSOR_SQRT = 4,
    CUDNN_OP_TENSOR_NOT  = 5;

public static native @Cast("cudnnStatus_t") int cudnnCreateOpTensorDescriptor(@ByPtrPtr cudnnOpTensorStruct opTensorDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetOpTensorDescriptor(cudnnOpTensorStruct opTensorDesc,
                           @Cast("cudnnOpTensorOp_t") int opTensorOp,
                           @Cast("cudnnDataType_t") int opTensorCompType,
                           @Cast("cudnnNanPropagation_t") int opTensorNanOpt);

public static native @Cast("cudnnStatus_t") int cudnnGetOpTensorDescriptor(cudnnOpTensorStruct opTensorDesc,
                           @Cast("cudnnOpTensorOp_t*") IntPointer opTensorOp,
                           @Cast("cudnnDataType_t*") IntPointer opTensorCompType,
                           @Cast("cudnnNanPropagation_t*") IntPointer opTensorNanOpt);
public static native @Cast("cudnnStatus_t") int cudnnGetOpTensorDescriptor(cudnnOpTensorStruct opTensorDesc,
                           @Cast("cudnnOpTensorOp_t*") IntBuffer opTensorOp,
                           @Cast("cudnnDataType_t*") IntBuffer opTensorCompType,
                           @Cast("cudnnNanPropagation_t*") IntBuffer opTensorNanOpt);
public static native @Cast("cudnnStatus_t") int cudnnGetOpTensorDescriptor(cudnnOpTensorStruct opTensorDesc,
                           @Cast("cudnnOpTensorOp_t*") int[] opTensorOp,
                           @Cast("cudnnDataType_t*") int[] opTensorCompType,
                           @Cast("cudnnNanPropagation_t*") int[] opTensorNanOpt);

public static native @Cast("cudnnStatus_t") int cudnnDestroyOpTensorDescriptor(cudnnOpTensorStruct opTensorDesc);

/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
public static native @Cast("cudnnStatus_t") int cudnnOpTensor(cudnnContext handle,
              cudnnOpTensorStruct opTensorDesc,
              @Const Pointer alpha1,
              cudnnTensorStruct aDesc,
              @Const Pointer A,
              @Const Pointer alpha2,
              cudnnTensorStruct bDesc,
              @Const Pointer B,
              @Const Pointer beta,
              cudnnTensorStruct cDesc,
              Pointer C);

/*
* CUDNN ReduceTensor op type
*/
/** enum cudnnReduceTensorOp_t */
public static final int
    CUDNN_REDUCE_TENSOR_ADD          = 0,
    CUDNN_REDUCE_TENSOR_MUL          = 1,
    CUDNN_REDUCE_TENSOR_MIN          = 2,
    CUDNN_REDUCE_TENSOR_MAX          = 3,
    CUDNN_REDUCE_TENSOR_AMAX         = 4,
    CUDNN_REDUCE_TENSOR_AVG          = 5,
    CUDNN_REDUCE_TENSOR_NORM1        = 6,
    CUDNN_REDUCE_TENSOR_NORM2        = 7,
    CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8;

/*
* CUDNN ReduceTensor indices type
*/
/** enum cudnnReduceTensorIndices_t */
public static final int
    CUDNN_REDUCE_TENSOR_NO_INDICES        = 0,
    CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1;

/*
* CUDNN tensor indices type size (all unsigned)
* Currently not supported, default is 32 bit unsigned.
*/
/** enum cudnnIndicesType_t */
public static final int
    CUDNN_32BIT_INDICES = 0,
    CUDNN_64BIT_INDICES = 1,
    CUDNN_16BIT_INDICES = 2,
    CUDNN_8BIT_INDICES  = 3;

public static native @Cast("cudnnStatus_t") int cudnnCreateReduceTensorDescriptor(@ByPtrPtr cudnnReduceTensorStruct reduceTensorDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetReduceTensorDescriptor(cudnnReduceTensorStruct reduceTensorDesc,
                               @Cast("cudnnReduceTensorOp_t") int reduceTensorOp,
                               @Cast("cudnnDataType_t") int reduceTensorCompType,
                               @Cast("cudnnNanPropagation_t") int reduceTensorNanOpt,
                               @Cast("cudnnReduceTensorIndices_t") int reduceTensorIndices,
                               @Cast("cudnnIndicesType_t") int reduceTensorIndicesType);

public static native @Cast("cudnnStatus_t") int cudnnGetReduceTensorDescriptor(cudnnReduceTensorStruct reduceTensorDesc,
                               @Cast("cudnnReduceTensorOp_t*") IntPointer reduceTensorOp,
                               @Cast("cudnnDataType_t*") IntPointer reduceTensorCompType,
                               @Cast("cudnnNanPropagation_t*") IntPointer reduceTensorNanOpt,
                               @Cast("cudnnReduceTensorIndices_t*") IntPointer reduceTensorIndices,
                               @Cast("cudnnIndicesType_t*") IntPointer reduceTensorIndicesType);
public static native @Cast("cudnnStatus_t") int cudnnGetReduceTensorDescriptor(cudnnReduceTensorStruct reduceTensorDesc,
                               @Cast("cudnnReduceTensorOp_t*") IntBuffer reduceTensorOp,
                               @Cast("cudnnDataType_t*") IntBuffer reduceTensorCompType,
                               @Cast("cudnnNanPropagation_t*") IntBuffer reduceTensorNanOpt,
                               @Cast("cudnnReduceTensorIndices_t*") IntBuffer reduceTensorIndices,
                               @Cast("cudnnIndicesType_t*") IntBuffer reduceTensorIndicesType);
public static native @Cast("cudnnStatus_t") int cudnnGetReduceTensorDescriptor(cudnnReduceTensorStruct reduceTensorDesc,
                               @Cast("cudnnReduceTensorOp_t*") int[] reduceTensorOp,
                               @Cast("cudnnDataType_t*") int[] reduceTensorCompType,
                               @Cast("cudnnNanPropagation_t*") int[] reduceTensorNanOpt,
                               @Cast("cudnnReduceTensorIndices_t*") int[] reduceTensorIndices,
                               @Cast("cudnnIndicesType_t*") int[] reduceTensorIndicesType);

public static native @Cast("cudnnStatus_t") int cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorStruct reduceTensorDesc);

/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
 * output tensors */
public static native @Cast("cudnnStatus_t") int cudnnGetReductionIndicesSize(cudnnContext handle,
                             cudnnReduceTensorStruct reduceTensorDesc,
                             cudnnTensorStruct aDesc,
                             cudnnTensorStruct cDesc,
                             @Cast("size_t*") SizeTPointer sizeInBytes);

/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
 * tensors */
public static native @Cast("cudnnStatus_t") int cudnnGetReductionWorkspaceSize(cudnnContext handle,
                               cudnnReduceTensorStruct reduceTensorDesc,
                               cudnnTensorStruct aDesc,
                               cudnnTensorStruct cDesc,
                               @Cast("size_t*") SizeTPointer sizeInBytes);

/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
/* The indices space is ignored for reduce ops other than min or max. */
public static native @Cast("cudnnStatus_t") int cudnnReduceTensor(cudnnContext handle,
                  cudnnReduceTensorStruct reduceTensorDesc,
                  Pointer indices,
                  @Cast("size_t") long indicesSizeInBytes,
                  Pointer workspace,
                  @Cast("size_t") long workspaceSizeInBytes,
                  @Const Pointer alpha,
                  cudnnTensorStruct aDesc,
                  @Const Pointer A,
                  @Const Pointer beta,
                  cudnnTensorStruct cDesc,
                  Pointer C);

/* Set all values of a tensor to a given value : y[i] = value[0] */
public static native @Cast("cudnnStatus_t") int cudnnSetTensor(cudnnContext handle, cudnnTensorStruct yDesc, Pointer y, @Const Pointer valuePtr);

/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
public static native @Cast("cudnnStatus_t") int cudnnScaleTensor(cudnnContext handle, cudnnTensorStruct yDesc, Pointer y, @Const Pointer alpha);

/*
 *  convolution mode
 */
/** enum cudnnConvolutionMode_t */
public static final int CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1;

/* Create an instance of FilterStruct */
public static native @Cast("cudnnStatus_t") int cudnnCreateFilterDescriptor(@ByPtrPtr cudnnFilterStruct filterDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetFilter4dDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           @Cast("cudnnTensorFormat_t") int format,
                           int k,
                           int c,
                           int h,
                           int w); /* width of  each input filter */

public static native @Cast("cudnnStatus_t") int cudnnGetFilter4dDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t*") IntPointer dataType,
                           @Cast("cudnnTensorFormat_t*") IntPointer format,
                           IntPointer k,
                           IntPointer c,
                           IntPointer h,
                           IntPointer w);
public static native @Cast("cudnnStatus_t") int cudnnGetFilter4dDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t*") IntBuffer dataType,
                           @Cast("cudnnTensorFormat_t*") IntBuffer format,
                           IntBuffer k,
                           IntBuffer c,
                           IntBuffer h,
                           IntBuffer w);
public static native @Cast("cudnnStatus_t") int cudnnGetFilter4dDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t*") int[] dataType,
                           @Cast("cudnnTensorFormat_t*") int[] format,
                           int[] k,
                           int[] c,
                           int[] h,
                           int[] w); /* width of  each input filter */

public static native @Cast("cudnnStatus_t") int cudnnSetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           @Cast("cudnnTensorFormat_t") int format,
                           int nbDims,
                           @Const IntPointer filterDimA);
public static native @Cast("cudnnStatus_t") int cudnnSetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           @Cast("cudnnTensorFormat_t") int format,
                           int nbDims,
                           @Const IntBuffer filterDimA);
public static native @Cast("cudnnStatus_t") int cudnnSetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           @Cast("cudnnDataType_t") int dataType,
                           @Cast("cudnnTensorFormat_t") int format,
                           int nbDims,
                           @Const int[] filterDimA);

public static native @Cast("cudnnStatus_t") int cudnnGetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") IntPointer dataType,
                           @Cast("cudnnTensorFormat_t*") IntPointer format,
                           IntPointer nbDims,
                           IntPointer filterDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") IntBuffer dataType,
                           @Cast("cudnnTensorFormat_t*") IntBuffer format,
                           IntBuffer nbDims,
                           IntBuffer filterDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetFilterNdDescriptor(cudnnFilterStruct filterDesc,
                           int nbDimsRequested,
                           @Cast("cudnnDataType_t*") int[] dataType,
                           @Cast("cudnnTensorFormat_t*") int[] format,
                           int[] nbDims,
                           int[] filterDimA);

public static native @Cast("cudnnStatus_t") int cudnnDestroyFilterDescriptor(cudnnFilterStruct filterDesc);

/* Create an instance of convolution descriptor */
public static native @Cast("cudnnStatus_t") int cudnnCreateConvolutionDescriptor(@ByPtrPtr cudnnConvolutionStruct convDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetConvolutionMathType(cudnnConvolutionStruct convDesc, @Cast("cudnnMathType_t") int mathType);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionMathType(cudnnConvolutionStruct convDesc, @Cast("cudnnMathType_t*") IntPointer mathType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionMathType(cudnnConvolutionStruct convDesc, @Cast("cudnnMathType_t*") IntBuffer mathType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionMathType(cudnnConvolutionStruct convDesc, @Cast("cudnnMathType_t*") int[] mathType);

public static native @Cast("cudnnStatus_t") int cudnnSetConvolutionGroupCount(cudnnConvolutionStruct convDesc, int groupCount);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionGroupCount(cudnnConvolutionStruct convDesc, IntPointer groupCount);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionGroupCount(cudnnConvolutionStruct convDesc, IntBuffer groupCount);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionGroupCount(cudnnConvolutionStruct convDesc, int[] groupCount);

public static native @Cast("cudnnStatus_t") int cudnnSetConvolution2dDescriptor(cudnnConvolutionStruct convDesc,
                                int pad_h,
                                int pad_w,
                                int u,
                                int v,
                                int dilation_h,
                                int dilation_w,
                                @Cast("cudnnConvolutionMode_t") int mode,
                                @Cast("cudnnDataType_t") int computeType);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dDescriptor(cudnnConvolutionStruct convDesc,
                                IntPointer pad_h,
                                IntPointer pad_w,
                                IntPointer u,
                                IntPointer v,
                                IntPointer dilation_h,
                                IntPointer dilation_w,
                                @Cast("cudnnConvolutionMode_t*") IntPointer mode,
                                @Cast("cudnnDataType_t*") IntPointer computeType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dDescriptor(cudnnConvolutionStruct convDesc,
                                IntBuffer pad_h,
                                IntBuffer pad_w,
                                IntBuffer u,
                                IntBuffer v,
                                IntBuffer dilation_h,
                                IntBuffer dilation_w,
                                @Cast("cudnnConvolutionMode_t*") IntBuffer mode,
                                @Cast("cudnnDataType_t*") IntBuffer computeType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dDescriptor(cudnnConvolutionStruct convDesc,
                                int[] pad_h,
                                int[] pad_w,
                                int[] u,
                                int[] v,
                                int[] dilation_h,
                                int[] dilation_w,
                                @Cast("cudnnConvolutionMode_t*") int[] mode,
                                @Cast("cudnnDataType_t*") int[] computeType);

/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      IntPointer n,
                                      IntPointer c,
                                      IntPointer h,
                                      IntPointer w);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      IntBuffer n,
                                      IntBuffer c,
                                      IntBuffer h,
                                      IntBuffer w);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolution2dForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      int[] n,
                                      int[] c,
                                      int[] h,
                                      int[] w);

public static native @Cast("cudnnStatus_t") int cudnnSetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLength,
                                @Const IntPointer padA,
                                @Const IntPointer filterStrideA,
                                @Const IntPointer dilationA,
                                @Cast("cudnnConvolutionMode_t") int mode,
                                @Cast("cudnnDataType_t") int computeType);
public static native @Cast("cudnnStatus_t") int cudnnSetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLength,
                                @Const IntBuffer padA,
                                @Const IntBuffer filterStrideA,
                                @Const IntBuffer dilationA,
                                @Cast("cudnnConvolutionMode_t") int mode,
                                @Cast("cudnnDataType_t") int computeType);
public static native @Cast("cudnnStatus_t") int cudnnSetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLength,
                                @Const int[] padA,
                                @Const int[] filterStrideA,
                                @Const int[] dilationA,
                                @Cast("cudnnConvolutionMode_t") int mode,
                                @Cast("cudnnDataType_t") int computeType); /* convolution data type */

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLengthRequested,
                                IntPointer arrayLength,
                                IntPointer padA,
                                IntPointer strideA,
                                IntPointer dilationA,
                                @Cast("cudnnConvolutionMode_t*") IntPointer mode,
                                @Cast("cudnnDataType_t*") IntPointer computeType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLengthRequested,
                                IntBuffer arrayLength,
                                IntBuffer padA,
                                IntBuffer strideA,
                                IntBuffer dilationA,
                                @Cast("cudnnConvolutionMode_t*") IntBuffer mode,
                                @Cast("cudnnDataType_t*") IntBuffer computeType);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdDescriptor(cudnnConvolutionStruct convDesc,
                                int arrayLengthRequested,
                                int[] arrayLength,
                                int[] padA,
                                int[] strideA,
                                int[] dilationA,
                                @Cast("cudnnConvolutionMode_t*") int[] mode,
                                @Cast("cudnnDataType_t*") int[] computeType); /* convolution data type */

/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      int nbDims,
                                      IntPointer tensorOuputDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      int nbDims,
                                      IntBuffer tensorOuputDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionNdForwardOutputDim(cudnnConvolutionStruct convDesc,
                                      cudnnTensorStruct inputTensorDesc,
                                      cudnnFilterStruct filterDesc,
                                      int nbDims,
                                      int[] tensorOuputDimA);

/* Destroy an instance of convolution descriptor */
public static native @Cast("cudnnStatus_t") int cudnnDestroyConvolutionDescriptor(cudnnConvolutionStruct convDesc);

/* helper function to provide the convolution algo that fit best the requirement */
/** enum cudnnConvolutionFwdPreference_t */
public static final int
    CUDNN_CONVOLUTION_FWD_NO_WORKSPACE            = 0,
    CUDNN_CONVOLUTION_FWD_PREFER_FASTEST          = 1,
    CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2;

/** enum cudnnConvolutionFwdAlgo_t */
public static final int
    CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM         = 0,
    CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
    CUDNN_CONVOLUTION_FWD_ALGO_GEMM                  = 2,
    CUDNN_CONVOLUTION_FWD_ALGO_DIRECT                = 3,
    CUDNN_CONVOLUTION_FWD_ALGO_FFT                   = 4,
    CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING            = 5,
    CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD              = 6,
    CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED     = 7,
    CUDNN_CONVOLUTION_FWD_ALGO_COUNT                 = 8;

public static class cudnnConvolutionFwdAlgoPerf_t extends Pointer {
    static { Loader.load(); }
    /** Default native constructor. */
    public cudnnConvolutionFwdAlgoPerf_t() { super((Pointer)null); allocate(); }
    /** Native array allocator. Access with {@link Pointer#position(long)}. */
    public cudnnConvolutionFwdAlgoPerf_t(long size) { super((Pointer)null); allocateArray(size); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnConvolutionFwdAlgoPerf_t(Pointer p) { super(p); }
    private native void allocate();
    private native void allocateArray(long size);
    @Override public cudnnConvolutionFwdAlgoPerf_t position(long position) {
        return (cudnnConvolutionFwdAlgoPerf_t)super.position(position);
    }

    public native @Cast("cudnnConvolutionFwdAlgo_t") int algo(); public native cudnnConvolutionFwdAlgoPerf_t algo(int algo);
    public native @Cast("cudnnStatus_t") int status(); public native cudnnConvolutionFwdAlgoPerf_t status(int status);
    public native float time(); public native cudnnConvolutionFwdAlgoPerf_t time(float time);
    public native @Cast("size_t") long memory(); public native cudnnConvolutionFwdAlgoPerf_t memory(long memory);
    public native @Cast("cudnnDeterminism_t") int determinism(); public native cudnnConvolutionFwdAlgoPerf_t determinism(int determinism);
    public native @Cast("cudnnMathType_t") int mathType(); public native cudnnConvolutionFwdAlgoPerf_t mathType(int mathType);
    public native int reserved(int i); public native cudnnConvolutionFwdAlgoPerf_t reserved(int i, int reserved);
    @MemberGetter public native IntPointer reserved();
}

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnContext handle, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnContext handle, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnContext handle, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithm(cudnnContext handle,
                                     cudnnTensorStruct xDesc,
                                     cudnnFilterStruct wDesc,
                                     cudnnConvolutionStruct convDesc,
                                     cudnnTensorStruct yDesc,
                                     int requestedAlgoCount,
                                     IntPointer returnedAlgoCount,
                                     cudnnConvolutionFwdAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithm(cudnnContext handle,
                                     cudnnTensorStruct xDesc,
                                     cudnnFilterStruct wDesc,
                                     cudnnConvolutionStruct convDesc,
                                     cudnnTensorStruct yDesc,
                                     int requestedAlgoCount,
                                     IntBuffer returnedAlgoCount,
                                     cudnnConvolutionFwdAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithm(cudnnContext handle,
                                     cudnnTensorStruct xDesc,
                                     cudnnFilterStruct wDesc,
                                     cudnnConvolutionStruct convDesc,
                                     cudnnTensorStruct yDesc,
                                     int requestedAlgoCount,
                                     int[] returnedAlgoCount,
                                     cudnnConvolutionFwdAlgoPerf_t perfResults);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithmEx(cudnnContext handle,
                                       cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct yDesc,
                                       Pointer y,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults,
                                       Pointer workSpace,
                                       @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithmEx(cudnnContext handle,
                                       cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct yDesc,
                                       Pointer y,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults,
                                       Pointer workSpace,
                                       @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionForwardAlgorithmEx(cudnnContext handle,
                                       cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct yDesc,
                                       Pointer y,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults,
                                       Pointer workSpace,
                                       @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm(cudnnContext handle,
                                    cudnnTensorStruct xDesc,
                                    cudnnFilterStruct wDesc,
                                    cudnnConvolutionStruct convDesc,
                                    cudnnTensorStruct yDesc,
                                    @Cast("cudnnConvolutionFwdPreference_t") int preference,
                                    @Cast("size_t") long memoryLimitInBytes,
                                    @Cast("cudnnConvolutionFwdAlgo_t*") IntPointer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm(cudnnContext handle,
                                    cudnnTensorStruct xDesc,
                                    cudnnFilterStruct wDesc,
                                    cudnnConvolutionStruct convDesc,
                                    cudnnTensorStruct yDesc,
                                    @Cast("cudnnConvolutionFwdPreference_t") int preference,
                                    @Cast("size_t") long memoryLimitInBytes,
                                    @Cast("cudnnConvolutionFwdAlgo_t*") IntBuffer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm(cudnnContext handle,
                                    cudnnTensorStruct xDesc,
                                    cudnnFilterStruct wDesc,
                                    cudnnConvolutionStruct convDesc,
                                    cudnnTensorStruct yDesc,
                                    @Cast("cudnnConvolutionFwdPreference_t") int preference,
                                    @Cast("size_t") long memoryLimitInBytes,
                                    @Cast("cudnnConvolutionFwdAlgo_t*") int[] algo);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm_v7(cudnnContext handle,
                                       cudnnTensorStruct srcDesc,
                                       cudnnFilterStruct filterDesc,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct destDesc,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm_v7(cudnnContext handle,
                                       cudnnTensorStruct srcDesc,
                                       cudnnFilterStruct filterDesc,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct destDesc,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardAlgorithm_v7(cudnnContext handle,
                                       cudnnTensorStruct srcDesc,
                                       cudnnFilterStruct filterDesc,
                                       cudnnConvolutionStruct convDesc,
                                       cudnnTensorStruct destDesc,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       cudnnConvolutionFwdAlgoPerf_t perfResults);

/*
 *  convolution algorithm (which requires potentially some workspace)
 */

/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionForwardWorkspaceSize(cudnnContext handle,
                                        cudnnTensorStruct xDesc,
                                        cudnnFilterStruct wDesc,
                                        cudnnConvolutionStruct convDesc,
                                        cudnnTensorStruct yDesc,
                                        @Cast("cudnnConvolutionFwdAlgo_t") int algo,
                                        @Cast("size_t*") SizeTPointer sizeInBytes);

/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */

/* Function to perform the forward pass for batch convolution */
public static native @Cast("cudnnStatus_t") int cudnnConvolutionForward(cudnnContext handle,
                        @Const Pointer alpha,
                        cudnnTensorStruct xDesc,
                        @Const Pointer x,
                        cudnnFilterStruct wDesc,
                        @Const Pointer w,
                        cudnnConvolutionStruct convDesc,
                        @Cast("cudnnConvolutionFwdAlgo_t") int algo,
                        Pointer workSpace,
                        @Cast("size_t") long workSpaceSizeInBytes,
                        @Const Pointer beta,
                        cudnnTensorStruct yDesc,
                        Pointer y);

/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
public static native @Cast("cudnnStatus_t") int cudnnConvolutionBiasActivationForward(cudnnContext handle,
                                      @Const Pointer alpha1,
                                      cudnnTensorStruct xDesc,
                                      @Const Pointer x,
                                      cudnnFilterStruct wDesc,
                                      @Const Pointer w,
                                      cudnnConvolutionStruct convDesc,
                                      @Cast("cudnnConvolutionFwdAlgo_t") int algo,
                                      Pointer workSpace,
                                      @Cast("size_t") long workSpaceSizeInBytes,
                                      @Const Pointer alpha2,
                                      cudnnTensorStruct zDesc,
                                      @Const Pointer z,
                                      cudnnTensorStruct biasDesc,
                                      @Const Pointer bias,
                                      cudnnActivationStruct activationDesc,
                                      cudnnTensorStruct yDesc,
                                      Pointer y);

/* Function to compute the bias gradient for batch convolution */
public static native @Cast("cudnnStatus_t") int cudnnConvolutionBackwardBias(cudnnContext handle,
                             @Const Pointer alpha,
                             cudnnTensorStruct dyDesc,
                             @Const Pointer dy,
                             @Const Pointer beta,
                             cudnnTensorStruct dbDesc,
                             Pointer db);

/* helper function to provide the convolution algo that fit best the requirement */
/** enum cudnnConvolutionBwdFilterPreference_t */
public static final int
    CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE            = 0,
    CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST          = 1,
    CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT = 2;

/** enum cudnnConvolutionBwdFilterAlgo_t */
public static final int
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0                 = 0, /* non-deterministic */
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1                 = 1,
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT               = 2,
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3                 = 3, /* non-deterministic */
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD          = 4, /* not implemented */
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING        = 6,
    CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT             = 7;

public static class cudnnConvolutionBwdFilterAlgoPerf_t extends Pointer {
    static { Loader.load(); }
    /** Default native constructor. */
    public cudnnConvolutionBwdFilterAlgoPerf_t() { super((Pointer)null); allocate(); }
    /** Native array allocator. Access with {@link Pointer#position(long)}. */
    public cudnnConvolutionBwdFilterAlgoPerf_t(long size) { super((Pointer)null); allocateArray(size); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnConvolutionBwdFilterAlgoPerf_t(Pointer p) { super(p); }
    private native void allocate();
    private native void allocateArray(long size);
    @Override public cudnnConvolutionBwdFilterAlgoPerf_t position(long position) {
        return (cudnnConvolutionBwdFilterAlgoPerf_t)super.position(position);
    }

    public native @Cast("cudnnConvolutionBwdFilterAlgo_t") int algo(); public native cudnnConvolutionBwdFilterAlgoPerf_t algo(int algo);
    public native @Cast("cudnnStatus_t") int status(); public native cudnnConvolutionBwdFilterAlgoPerf_t status(int status);
    public native float time(); public native cudnnConvolutionBwdFilterAlgoPerf_t time(float time);
    public native @Cast("size_t") long memory(); public native cudnnConvolutionBwdFilterAlgoPerf_t memory(long memory);
    public native @Cast("cudnnDeterminism_t") int determinism(); public native cudnnConvolutionBwdFilterAlgoPerf_t determinism(int determinism);
    public native @Cast("cudnnMathType_t") int mathType(); public native cudnnConvolutionBwdFilterAlgoPerf_t mathType(int mathType);
    public native int reserved(int i); public native cudnnConvolutionBwdFilterAlgoPerf_t reserved(int i, int reserved);
    @MemberGetter public native IntPointer reserved();
}

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnContext handle, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnContext handle, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnContext handle, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                            cudnnTensorStruct xDesc,
                                            cudnnTensorStruct dyDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnFilterStruct dwDesc,
                                            int requestedAlgoCount,
                                            IntPointer returnedAlgoCount,
                                            cudnnConvolutionBwdFilterAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                            cudnnTensorStruct xDesc,
                                            cudnnTensorStruct dyDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnFilterStruct dwDesc,
                                            int requestedAlgoCount,
                                            IntBuffer returnedAlgoCount,
                                            cudnnConvolutionBwdFilterAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                            cudnnTensorStruct xDesc,
                                            cudnnTensorStruct dyDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnFilterStruct dwDesc,
                                            int requestedAlgoCount,
                                            int[] returnedAlgoCount,
                                            cudnnConvolutionBwdFilterAlgoPerf_t perfResults);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnContext handle,
                                              cudnnTensorStruct xDesc,
                                              @Const Pointer x,
                                              cudnnTensorStruct dyDesc,
                                              @Const Pointer y,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct dwDesc,
                                              Pointer dw,
                                              int requestedAlgoCount,
                                              IntPointer returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults,
                                              Pointer workSpace,
                                              @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnContext handle,
                                              cudnnTensorStruct xDesc,
                                              @Const Pointer x,
                                              cudnnTensorStruct dyDesc,
                                              @Const Pointer y,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct dwDesc,
                                              Pointer dw,
                                              int requestedAlgoCount,
                                              IntBuffer returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults,
                                              Pointer workSpace,
                                              @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnContext handle,
                                              cudnnTensorStruct xDesc,
                                              @Const Pointer x,
                                              cudnnTensorStruct dyDesc,
                                              @Const Pointer y,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct dwDesc,
                                              Pointer dw,
                                              int requestedAlgoCount,
                                              int[] returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults,
                                              Pointer workSpace,
                                              @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                           cudnnTensorStruct xDesc,
                                           cudnnTensorStruct dyDesc,
                                           cudnnConvolutionStruct convDesc,
                                           cudnnFilterStruct dwDesc,
                                           @Cast("cudnnConvolutionBwdFilterPreference_t") int preference,
                                           @Cast("size_t") long memoryLimitInBytes,
                                           @Cast("cudnnConvolutionBwdFilterAlgo_t*") IntPointer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                           cudnnTensorStruct xDesc,
                                           cudnnTensorStruct dyDesc,
                                           cudnnConvolutionStruct convDesc,
                                           cudnnFilterStruct dwDesc,
                                           @Cast("cudnnConvolutionBwdFilterPreference_t") int preference,
                                           @Cast("size_t") long memoryLimitInBytes,
                                           @Cast("cudnnConvolutionBwdFilterAlgo_t*") IntBuffer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext handle,
                                           cudnnTensorStruct xDesc,
                                           cudnnTensorStruct dyDesc,
                                           cudnnConvolutionStruct convDesc,
                                           cudnnFilterStruct dwDesc,
                                           @Cast("cudnnConvolutionBwdFilterPreference_t") int preference,
                                           @Cast("size_t") long memoryLimitInBytes,
                                           @Cast("cudnnConvolutionBwdFilterAlgo_t*") int[] algo);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnContext handle,
                                              cudnnTensorStruct srcDesc,
                                              cudnnTensorStruct diffDesc,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct gradDesc,
                                              int requestedAlgoCount,
                                              IntPointer returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnContext handle,
                                              cudnnTensorStruct srcDesc,
                                              cudnnTensorStruct diffDesc,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct gradDesc,
                                              int requestedAlgoCount,
                                              IntBuffer returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnContext handle,
                                              cudnnTensorStruct srcDesc,
                                              cudnnTensorStruct diffDesc,
                                              cudnnConvolutionStruct convDesc,
                                              cudnnFilterStruct gradDesc,
                                              int requestedAlgoCount,
                                              int[] returnedAlgoCount,
                                              cudnnConvolutionBwdFilterAlgoPerf_t perfResults);

/*
 *  convolution algorithm (which requires potentially some workspace)
 */

/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext handle,
                                               cudnnTensorStruct xDesc,
                                               cudnnTensorStruct dyDesc,
                                               cudnnConvolutionStruct convDesc,
                                               cudnnFilterStruct gradDesc,
                                               @Cast("cudnnConvolutionBwdFilterAlgo_t") int algo,
                                               @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnConvolutionBackwardFilter(cudnnContext handle,
                               @Const Pointer alpha,
                               cudnnTensorStruct xDesc,
                               @Const Pointer x,
                               cudnnTensorStruct dyDesc,
                               @Const Pointer dy,
                               cudnnConvolutionStruct convDesc,
                               @Cast("cudnnConvolutionBwdFilterAlgo_t") int algo,
                               Pointer workSpace,
                               @Cast("size_t") long workSpaceSizeInBytes,
                               @Const Pointer beta,
                               cudnnFilterStruct dwDesc,
                               Pointer dw);

/*********************************************************/
/* helper function to provide the convolution algo that fit best the requirement */
/** enum cudnnConvolutionBwdDataPreference_t */
public static final int
    CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE            = 0,
    CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST          = 1,
    CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT = 2;

/** enum cudnnConvolutionBwdDataAlgo_t */
public static final int
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_0                 = 0, /* non-deterministic */
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_1                 = 1,
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT               = 2,
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING        = 3,
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD          = 4,
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
    CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT             = 6;

public static class cudnnConvolutionBwdDataAlgoPerf_t extends Pointer {
    static { Loader.load(); }
    /** Default native constructor. */
    public cudnnConvolutionBwdDataAlgoPerf_t() { super((Pointer)null); allocate(); }
    /** Native array allocator. Access with {@link Pointer#position(long)}. */
    public cudnnConvolutionBwdDataAlgoPerf_t(long size) { super((Pointer)null); allocateArray(size); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnConvolutionBwdDataAlgoPerf_t(Pointer p) { super(p); }
    private native void allocate();
    private native void allocateArray(long size);
    @Override public cudnnConvolutionBwdDataAlgoPerf_t position(long position) {
        return (cudnnConvolutionBwdDataAlgoPerf_t)super.position(position);
    }

    public native @Cast("cudnnConvolutionBwdDataAlgo_t") int algo(); public native cudnnConvolutionBwdDataAlgoPerf_t algo(int algo);
    public native @Cast("cudnnStatus_t") int status(); public native cudnnConvolutionBwdDataAlgoPerf_t status(int status);
    public native float time(); public native cudnnConvolutionBwdDataAlgoPerf_t time(float time);
    public native @Cast("size_t") long memory(); public native cudnnConvolutionBwdDataAlgoPerf_t memory(long memory);
    public native @Cast("cudnnDeterminism_t") int determinism(); public native cudnnConvolutionBwdDataAlgoPerf_t determinism(int determinism);
    public native @Cast("cudnnMathType_t") int mathType(); public native cudnnConvolutionBwdDataAlgoPerf_t mathType(int mathType);
    public native int reserved(int i); public native cudnnConvolutionBwdDataAlgoPerf_t reserved(int i, int reserved);
    @MemberGetter public native IntPointer reserved();
}

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnContext handle, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnContext handle, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnContext handle, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                          cudnnFilterStruct wDesc,
                                          cudnnTensorStruct dyDesc,
                                          cudnnConvolutionStruct convDesc,
                                          cudnnTensorStruct dxDesc,
                                          int requestedAlgoCount,
                                          IntPointer returnedAlgoCount,
                                          cudnnConvolutionBwdDataAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                          cudnnFilterStruct wDesc,
                                          cudnnTensorStruct dyDesc,
                                          cudnnConvolutionStruct convDesc,
                                          cudnnTensorStruct dxDesc,
                                          int requestedAlgoCount,
                                          IntBuffer returnedAlgoCount,
                                          cudnnConvolutionBwdDataAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                          cudnnFilterStruct wDesc,
                                          cudnnTensorStruct dyDesc,
                                          cudnnConvolutionStruct convDesc,
                                          cudnnTensorStruct dxDesc,
                                          int requestedAlgoCount,
                                          int[] returnedAlgoCount,
                                          cudnnConvolutionBwdDataAlgoPerf_t perfResults);

public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnContext handle,
                                            cudnnFilterStruct wDesc,
                                            @Const Pointer w,
                                            cudnnTensorStruct dyDesc,
                                            @Const Pointer dy,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct dxDesc,
                                            Pointer dx,
                                            int requestedAlgoCount,
                                            IntPointer returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults,
                                            Pointer workSpace,
                                            @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnContext handle,
                                            cudnnFilterStruct wDesc,
                                            @Const Pointer w,
                                            cudnnTensorStruct dyDesc,
                                            @Const Pointer dy,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct dxDesc,
                                            Pointer dx,
                                            int requestedAlgoCount,
                                            IntBuffer returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults,
                                            Pointer workSpace,
                                            @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnContext handle,
                                            cudnnFilterStruct wDesc,
                                            @Const Pointer w,
                                            cudnnTensorStruct dyDesc,
                                            @Const Pointer dy,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct dxDesc,
                                            Pointer dx,
                                            int requestedAlgoCount,
                                            int[] returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults,
                                            Pointer workSpace,
                                            @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                         cudnnFilterStruct wDesc,
                                         cudnnTensorStruct dyDesc,
                                         cudnnConvolutionStruct convDesc,
                                         cudnnTensorStruct dxDesc,
                                         @Cast("cudnnConvolutionBwdDataPreference_t") int preference,
                                         @Cast("size_t") long memoryLimitInBytes,
                                         @Cast("cudnnConvolutionBwdDataAlgo_t*") IntPointer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                         cudnnFilterStruct wDesc,
                                         cudnnTensorStruct dyDesc,
                                         cudnnConvolutionStruct convDesc,
                                         cudnnTensorStruct dxDesc,
                                         @Cast("cudnnConvolutionBwdDataPreference_t") int preference,
                                         @Cast("size_t") long memoryLimitInBytes,
                                         @Cast("cudnnConvolutionBwdDataAlgo_t*") IntBuffer algo);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext handle,
                                         cudnnFilterStruct wDesc,
                                         cudnnTensorStruct dyDesc,
                                         cudnnConvolutionStruct convDesc,
                                         cudnnTensorStruct dxDesc,
                                         @Cast("cudnnConvolutionBwdDataPreference_t") int preference,
                                         @Cast("size_t") long memoryLimitInBytes,
                                         @Cast("cudnnConvolutionBwdDataAlgo_t*") int[] algo);

public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnContext handle,
                                            cudnnFilterStruct filterDesc,
                                            cudnnTensorStruct diffDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct gradDesc,
                                            int requestedAlgoCount,
                                            IntPointer returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnContext handle,
                                            cudnnFilterStruct filterDesc,
                                            cudnnTensorStruct diffDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct gradDesc,
                                            int requestedAlgoCount,
                                            IntBuffer returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults);
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnContext handle,
                                            cudnnFilterStruct filterDesc,
                                            cudnnTensorStruct diffDesc,
                                            cudnnConvolutionStruct convDesc,
                                            cudnnTensorStruct gradDesc,
                                            int requestedAlgoCount,
                                            int[] returnedAlgoCount,
                                            cudnnConvolutionBwdDataAlgoPerf_t perfResults);

/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
public static native @Cast("cudnnStatus_t") int cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext handle,
                                             cudnnFilterStruct wDesc,
                                             cudnnTensorStruct dyDesc,
                                             cudnnConvolutionStruct convDesc,
                                             cudnnTensorStruct dxDesc,
                                             @Cast("cudnnConvolutionBwdDataAlgo_t") int algo,
                                             @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnConvolutionBackwardData(cudnnContext handle,
                             @Const Pointer alpha,
                             cudnnFilterStruct wDesc,
                             @Const Pointer w,
                             cudnnTensorStruct dyDesc,
                             @Const Pointer dy,
                             cudnnConvolutionStruct convDesc,
                             @Cast("cudnnConvolutionBwdDataAlgo_t") int algo,
                             Pointer workSpace,
                             @Cast("size_t") long workSpaceSizeInBytes,
                             @Const Pointer beta,
                             cudnnTensorStruct dxDesc,
                             Pointer dx);

public static native @Cast("cudnnStatus_t") int cudnnIm2Col(cudnnContext handle,
            cudnnTensorStruct xDesc,
            @Const Pointer x,
            cudnnFilterStruct wDesc,
            cudnnConvolutionStruct convDesc,
            Pointer colBuffer);

/*
 *  softmax algorithm
 */
/** enum cudnnSoftmaxAlgorithm_t */
public static final int
    CUDNN_SOFTMAX_FAST     = 0, /* straightforward implementation */
    CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
    CUDNN_SOFTMAX_LOG      = 2;

/** enum cudnnSoftmaxMode_t */
public static final int
    CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
    CUDNN_SOFTMAX_MODE_CHANNEL  = 1;  /* compute the softmax over all C for each H, W, N */

/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */

/* Function to perform forward softmax */
public static native @Cast("cudnnStatus_t") int cudnnSoftmaxForward(cudnnContext handle,
                    @Cast("cudnnSoftmaxAlgorithm_t") int algo,
                    @Cast("cudnnSoftmaxMode_t") int mode,
                    @Const Pointer alpha,
                    cudnnTensorStruct xDesc,
                    @Const Pointer x,
                    @Const Pointer beta,
                    cudnnTensorStruct yDesc,
                    Pointer y);

/* Function to perform backward softmax */
public static native @Cast("cudnnStatus_t") int cudnnSoftmaxBackward(cudnnContext handle,
                     @Cast("cudnnSoftmaxAlgorithm_t") int algo,
                     @Cast("cudnnSoftmaxMode_t") int mode,
                     @Const Pointer alpha,
                     cudnnTensorStruct yDesc,
                     @Const Pointer y,
                     cudnnTensorStruct dyDesc,
                     @Const Pointer dy,
                     @Const Pointer beta,
                     cudnnTensorStruct dxDesc,
                     Pointer dx);

/*
 *  pooling mode
 */
/** enum cudnnPoolingMode_t */
public static final int
    CUDNN_POOLING_MAX                           = 0,
    CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
    CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
    CUDNN_POOLING_MAX_DETERMINISTIC             = 3;

/* Create an instance of pooling descriptor */
public static native @Cast("cudnnStatus_t") int cudnnCreatePoolingDescriptor(@ByPtrPtr cudnnPoolingStruct poolingDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetPooling2dDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("cudnnPoolingMode_t") int mode,
                            @Cast("cudnnNanPropagation_t") int maxpoolingNanOpt,
                            int windowHeight,
                            int windowWidth,
                            int verticalPadding,
                            int horizontalPadding,
                            int verticalStride,
                            int horizontalStride);

public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("cudnnPoolingMode_t*") IntPointer mode,
                            @Cast("cudnnNanPropagation_t*") IntPointer maxpoolingNanOpt,
                            IntPointer windowHeight,
                            IntPointer windowWidth,
                            IntPointer verticalPadding,
                            IntPointer horizontalPadding,
                            IntPointer verticalStride,
                            IntPointer horizontalStride);
public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("cudnnPoolingMode_t*") IntBuffer mode,
                            @Cast("cudnnNanPropagation_t*") IntBuffer maxpoolingNanOpt,
                            IntBuffer windowHeight,
                            IntBuffer windowWidth,
                            IntBuffer verticalPadding,
                            IntBuffer horizontalPadding,
                            IntBuffer verticalStride,
                            IntBuffer horizontalStride);
public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("cudnnPoolingMode_t*") int[] mode,
                            @Cast("cudnnNanPropagation_t*") int[] maxpoolingNanOpt,
                            int[] windowHeight,
                            int[] windowWidth,
                            int[] verticalPadding,
                            int[] horizontalPadding,
                            int[] verticalStride,
                            int[] horizontalStride);

public static native @Cast("cudnnStatus_t") int cudnnSetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("const cudnnPoolingMode_t") int mode,
                            @Cast("const cudnnNanPropagation_t") int maxpoolingNanOpt,
                            int nbDims,
                            @Const IntPointer windowDimA,
                            @Const IntPointer paddingA,
                            @Const IntPointer strideA);
public static native @Cast("cudnnStatus_t") int cudnnSetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("const cudnnPoolingMode_t") int mode,
                            @Cast("const cudnnNanPropagation_t") int maxpoolingNanOpt,
                            int nbDims,
                            @Const IntBuffer windowDimA,
                            @Const IntBuffer paddingA,
                            @Const IntBuffer strideA);
public static native @Cast("cudnnStatus_t") int cudnnSetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            @Cast("const cudnnPoolingMode_t") int mode,
                            @Cast("const cudnnNanPropagation_t") int maxpoolingNanOpt,
                            int nbDims,
                            @Const int[] windowDimA,
                            @Const int[] paddingA,
                            @Const int[] strideA);

public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            int nbDimsRequested,
                            @Cast("cudnnPoolingMode_t*") IntPointer mode,
                            @Cast("cudnnNanPropagation_t*") IntPointer maxpoolingNanOpt,
                            IntPointer nbDims,
                            IntPointer windowDimA,
                            IntPointer paddingA,
                            IntPointer strideA);
public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            int nbDimsRequested,
                            @Cast("cudnnPoolingMode_t*") IntBuffer mode,
                            @Cast("cudnnNanPropagation_t*") IntBuffer maxpoolingNanOpt,
                            IntBuffer nbDims,
                            IntBuffer windowDimA,
                            IntBuffer paddingA,
                            IntBuffer strideA);
public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdDescriptor(cudnnPoolingStruct poolingDesc,
                            int nbDimsRequested,
                            @Cast("cudnnPoolingMode_t*") int[] mode,
                            @Cast("cudnnNanPropagation_t*") int[] maxpoolingNanOpt,
                            int[] nbDims,
                            int[] windowDimA,
                            int[] paddingA,
                            int[] strideA);

public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  int nbDims,
                                  IntPointer outputTensorDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  int nbDims,
                                  IntBuffer outputTensorDimA);
public static native @Cast("cudnnStatus_t") int cudnnGetPoolingNdForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  int nbDims,
                                  int[] outputTensorDimA);

public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  IntPointer n,
                                  IntPointer c,
                                  IntPointer h,
                                  IntPointer w);
public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  IntBuffer n,
                                  IntBuffer c,
                                  IntBuffer h,
                                  IntBuffer w);
public static native @Cast("cudnnStatus_t") int cudnnGetPooling2dForwardOutputDim(cudnnPoolingStruct poolingDesc,
                                  cudnnTensorStruct inputTensorDesc,
                                  int[] n,
                                  int[] c,
                                  int[] h,
                                  int[] w);

/* Destroy an instance of pooling descriptor */
public static native @Cast("cudnnStatus_t") int cudnnDestroyPoolingDescriptor(cudnnPoolingStruct poolingDesc);

/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */

/* Function to perform forward pooling */
public static native @Cast("cudnnStatus_t") int cudnnPoolingForward(cudnnContext handle,
                    cudnnPoolingStruct poolingDesc,
                    @Const Pointer alpha,
                    cudnnTensorStruct xDesc,
                    @Const Pointer x,
                    @Const Pointer beta,
                    cudnnTensorStruct yDesc,
                    Pointer y);

/* Function to perform backward pooling */
public static native @Cast("cudnnStatus_t") int cudnnPoolingBackward(cudnnContext handle,
                     cudnnPoolingStruct poolingDesc,
                     @Const Pointer alpha,
                     cudnnTensorStruct yDesc,
                     @Const Pointer y,
                     cudnnTensorStruct dyDesc,
                     @Const Pointer dy,
                     cudnnTensorStruct xDesc,
                     @Const Pointer x,
                     @Const Pointer beta,
                     cudnnTensorStruct dxDesc,
                     Pointer dx);

/*
 * activation mode
 */
/** enum cudnnActivationMode_t */
public static final int
    CUDNN_ACTIVATION_SIGMOID      = 0,
    CUDNN_ACTIVATION_RELU         = 1,
    CUDNN_ACTIVATION_TANH         = 2,
    CUDNN_ACTIVATION_CLIPPED_RELU = 3,
    CUDNN_ACTIVATION_ELU          = 4,
    CUDNN_ACTIVATION_IDENTITY     = 5;

/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
public static native @Cast("cudnnStatus_t") int cudnnCreateActivationDescriptor(@ByPtrPtr cudnnActivationStruct activationDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetActivationDescriptor(cudnnActivationStruct activationDesc,
                             @Cast("cudnnActivationMode_t") int mode,
                             @Cast("cudnnNanPropagation_t") int reluNanOpt,
                             double coef); /* ceiling for clipped RELU, alpha for ELU */

public static native @Cast("cudnnStatus_t") int cudnnGetActivationDescriptor(cudnnActivationStruct activationDesc,
                             @Cast("cudnnActivationMode_t*") IntPointer mode,
                             @Cast("cudnnNanPropagation_t*") IntPointer reluNanOpt,
                             DoublePointer coef);
public static native @Cast("cudnnStatus_t") int cudnnGetActivationDescriptor(cudnnActivationStruct activationDesc,
                             @Cast("cudnnActivationMode_t*") IntBuffer mode,
                             @Cast("cudnnNanPropagation_t*") IntBuffer reluNanOpt,
                             DoubleBuffer coef);
public static native @Cast("cudnnStatus_t") int cudnnGetActivationDescriptor(cudnnActivationStruct activationDesc,
                             @Cast("cudnnActivationMode_t*") int[] mode,
                             @Cast("cudnnNanPropagation_t*") int[] reluNanOpt,
                             double[] coef); /* ceiling for clipped RELU, alpha for ELU */

public static native @Cast("cudnnStatus_t") int cudnnDestroyActivationDescriptor(cudnnActivationStruct activationDesc);

/* Function to perform forward activation  */
public static native @Cast("cudnnStatus_t") int cudnnActivationForward(cudnnContext handle,
                       cudnnActivationStruct activationDesc,
                       @Const Pointer alpha,
                       cudnnTensorStruct xDesc,
                       @Const Pointer x,
                       @Const Pointer beta,
                       cudnnTensorStruct yDesc,
                       Pointer y);

/* Function to perform backward activation  */
public static native @Cast("cudnnStatus_t") int cudnnActivationBackward(cudnnContext handle,
                        cudnnActivationStruct activationDesc,
                        @Const Pointer alpha,
                        cudnnTensorStruct yDesc,
                        @Const Pointer y,
                        cudnnTensorStruct dyDesc,
                        @Const Pointer dy,
                        cudnnTensorStruct xDesc,
                        @Const Pointer x,
                        @Const Pointer beta,
                        cudnnTensorStruct dxDesc,
                        Pointer dx);

/*
* Create an instance of LRN (Local Response Normalization) descriptor
* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
*/
public static native @Cast("cudnnStatus_t") int cudnnCreateLRNDescriptor(@ByPtrPtr cudnnLRNStruct normDesc);

public static final int CUDNN_LRN_MIN_N = 1;       /* minimum allowed lrnN */
public static final int CUDNN_LRN_MAX_N = 16;      /* maximum allowed lrnN */
public static final double CUDNN_LRN_MIN_K = 1e-5;    /* minimum allowed lrnK */
public static final double CUDNN_LRN_MIN_BETA = 0.01; /* minimum allowed lrnBeta */

/* LRN layer mode */
/** enum cudnnLRNMode_t */
public static final int
    CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0; /* Normalize across tensor's dimA[1] dimension */

/*
* Uses a window [center-lookBehind, center+lookAhead], where
* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
* Values of double parameters cast to tensor data type.
*/
public static native @Cast("cudnnStatus_t") int cudnnSetLRNDescriptor(cudnnLRNStruct normDesc, @Cast("unsigned") int lrnN, double lrnAlpha, double lrnBeta, double lrnK);
/*
* Retrieve the settings currently stored in an LRN layer descriptor
* Any of the provided pointers can be NULL (no corresponding value will be returned)
*/
public static native @Cast("cudnnStatus_t") int cudnnGetLRNDescriptor(cudnnLRNStruct normDesc, @Cast("unsigned*") IntPointer lrnN, DoublePointer lrnAlpha, DoublePointer lrnBeta, DoublePointer lrnK);
public static native @Cast("cudnnStatus_t") int cudnnGetLRNDescriptor(cudnnLRNStruct normDesc, @Cast("unsigned*") IntBuffer lrnN, DoubleBuffer lrnAlpha, DoubleBuffer lrnBeta, DoubleBuffer lrnK);
public static native @Cast("cudnnStatus_t") int cudnnGetLRNDescriptor(cudnnLRNStruct normDesc, @Cast("unsigned*") int[] lrnN, double[] lrnAlpha, double[] lrnBeta, double[] lrnK);

/* Destroy an instance of LRN descriptor */
public static native @Cast("cudnnStatus_t") int cudnnDestroyLRNDescriptor(cudnnLRNStruct lrnDesc);

/* LRN functions: output = alpha * normalize(x) + beta * old_y */

/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
public static native @Cast("cudnnStatus_t") int cudnnLRNCrossChannelForward(cudnnContext handle,
                            cudnnLRNStruct normDesc,
                            @Cast("cudnnLRNMode_t") int lrnMode,
                            @Const Pointer alpha,
                            cudnnTensorStruct xDesc,
                            @Const Pointer x,
                            @Const Pointer beta,
                            cudnnTensorStruct yDesc,
                            Pointer y);

/* LRN cross-channel backward computation. Double parameters cast to tensor data type */
public static native @Cast("cudnnStatus_t") int cudnnLRNCrossChannelBackward(cudnnContext handle,
                             cudnnLRNStruct normDesc,
                             @Cast("cudnnLRNMode_t") int lrnMode,
                             @Const Pointer alpha,
                             cudnnTensorStruct yDesc,
                             @Const Pointer y,
                             cudnnTensorStruct dyDesc,
                             @Const Pointer dy,
                             cudnnTensorStruct xDesc,
                             @Const Pointer x,
                             @Const Pointer beta,
                             cudnnTensorStruct dxDesc,
                             Pointer dx);

/** enum cudnnDivNormMode_t */
public static final int
    CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0;

/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
public static native @Cast("cudnnStatus_t") int cudnnDivisiveNormalizationForward(cudnnContext handle,
                                  cudnnLRNStruct normDesc,
                                  @Cast("cudnnDivNormMode_t") int mode,
                                  @Const Pointer alpha,
                                  cudnnTensorStruct xDesc,
                                  @Const Pointer x,
                                  @Const Pointer means,
                                  Pointer temp,
                                  Pointer temp2,
                                  @Const Pointer beta,
                                  cudnnTensorStruct yDesc,
                                  Pointer y);

public static native @Cast("cudnnStatus_t") int cudnnDivisiveNormalizationBackward(cudnnContext handle,
                                   cudnnLRNStruct normDesc,
                                   @Cast("cudnnDivNormMode_t") int mode,
                                   @Const Pointer alpha,
                                   cudnnTensorStruct xDesc,
                                   @Const Pointer x,
                                   @Const Pointer means,
                                   @Const Pointer dy,
                                   Pointer temp,
                                   Pointer temp2,
                                   @Const Pointer beta,
                                   cudnnTensorStruct dXdMeansDesc,
                                   Pointer dx,
                                   Pointer dMeans); /* output means differential, can be NULL */

/** enum cudnnBatchNormMode_t */
public static final int
    /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
    CUDNN_BATCHNORM_PER_ACTIVATION = 0,

    /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
    CUDNN_BATCHNORM_SPATIAL = 1,

    /*
     * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
     * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
     */
    CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2;

public static final double CUDNN_BN_MIN_EPSILON = 1e-5; /* Minimum epsilon allowed to be used in the Batch Normalization formula */

/*
* Derives a tensor descriptor from layer data descriptor for BatchNormalization
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
*/
public static native @Cast("cudnnStatus_t") int cudnnDeriveBNTensorDescriptor(cudnnTensorStruct derivedBnDesc,
                              cudnnTensorStruct xDesc,
                              @Cast("cudnnBatchNormMode_t") int mode);

/** enum cudnnBatchNormOps_t */
public static final int
    CUDNN_BATCHNORM_OPS_BN                = 0, /* do batch normalization only */
    CUDNN_BATCHNORM_OPS_BN_ACTIVATION     = 1, /* do batchNorm, then activation */
    CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2; /* do batchNorm, then elemWiseAdd, then activation */

public static native @Cast("cudnnStatus_t") int cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnContext handle,
                                                         @Cast("cudnnBatchNormMode_t") int mode,
                                                         @Cast("cudnnBatchNormOps_t") int bnOps,
                                                         cudnnTensorStruct xDesc,
                                                         cudnnTensorStruct zDesc,
                                                         cudnnTensorStruct yDesc,
                                                         cudnnTensorStruct bnScaleBiasMeanVarDesc,
                                                         cudnnActivationStruct activationDesc,
                                                         @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnContext handle,
                                                  @Cast("cudnnBatchNormMode_t") int mode,
                                                  @Cast("cudnnBatchNormOps_t") int bnOps,
                                                  cudnnTensorStruct xDesc,
                                                  cudnnTensorStruct yDesc,
                                                  cudnnTensorStruct dyDesc,
                                                  cudnnTensorStruct dzDesc,
                                                  cudnnTensorStruct dxDesc,
                                                  cudnnTensorStruct dBnScaleBiasDesc,
                                                  cudnnActivationStruct activationDesc,
                                                  @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnContext handle,
                                                     @Cast("cudnnBatchNormMode_t") int mode,
                                                     @Cast("cudnnBatchNormOps_t") int bnOps,
                                                     cudnnActivationStruct activationDesc,
                                                     cudnnTensorStruct xDesc,
                                                     @Cast("size_t*") SizeTPointer sizeInBytes);

/* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
public static native @Cast("cudnnStatus_t") int cudnnBatchNormalizationForwardTraining(
    cudnnContext handle,
    @Cast("cudnnBatchNormMode_t") int mode,

    @Const Pointer alpha,
    @Const Pointer beta,

    cudnnTensorStruct xDesc,
    @Const Pointer x,
    cudnnTensorStruct yDesc,
    Pointer y,
    cudnnTensorStruct bnScaleBiasMeanVarDesc,
    @Const Pointer bnScale,
    @Const Pointer bnBias,
    double exponentialAverageFactor,
    Pointer resultRunningMean,
    Pointer resultRunningVariance,
    double epsilon,
    Pointer resultSaveMean,
    Pointer resultSaveInvVariance);

/* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
public static native @Cast("cudnnStatus_t") int cudnnBatchNormalizationForwardTrainingEx(
    cudnnContext handle,
    @Cast("cudnnBatchNormMode_t") int mode,
    @Cast("cudnnBatchNormOps_t") int bnOps,

    @Const Pointer alpha,
    @Const Pointer beta,

    cudnnTensorStruct xDesc,
    @Const Pointer xData,
    cudnnTensorStruct zDesc,
    @Const Pointer zData,
    cudnnTensorStruct yDesc,
    Pointer yData,

    cudnnTensorStruct bnScaleBiasMeanVarDesc,
    @Const Pointer bnScale,
    @Const Pointer bnBias,

    double exponentialAverageFactor,
    Pointer resultRunningMean,
    Pointer resultRunningVariance,
    double epsilon,
    Pointer resultSaveMean,
    Pointer resultSaveInvVariance,

    cudnnActivationStruct activationDesc,
    Pointer workspace,
    @Cast("size_t") long workSpaceSizeInBytes,
    Pointer reserveSpace,
    @Cast("size_t") long reserveSpaceSizeInBytes);

/*
* Performs Batch Normalization during Inference:
* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
* above for notes on function arguments.
*/
public static native @Cast("cudnnStatus_t") int cudnnBatchNormalizationForwardInference(cudnnContext handle,
                                        @Cast("cudnnBatchNormMode_t") int mode,
                                        @Const Pointer alpha,
                                        @Const Pointer beta,
                                        cudnnTensorStruct xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct yDesc,
                                        Pointer y,
                                        cudnnTensorStruct bnScaleBiasMeanVarDesc,
                                        @Const Pointer bnScale,
                                        @Const Pointer bnBias,
                                        @Const Pointer estimatedMean,
                                        @Const Pointer estimatedVariance,
                                        double epsilon);

/* Performs backward pass of Batch Normalization layer. Returns x gradient,
* bnScale gradient and bnBias gradient */
public static native @Cast("cudnnStatus_t") int cudnnBatchNormalizationBackward(cudnnContext handle,
                                @Cast("cudnnBatchNormMode_t") int mode,
                                @Const Pointer alphaDataDiff,
                                @Const Pointer betaDataDiff,
                                @Const Pointer alphaParamDiff,
                                @Const Pointer betaParamDiff,
                                cudnnTensorStruct xDesc,
                                @Const Pointer x,
                                cudnnTensorStruct dyDesc,
                                @Const Pointer dy,
                                cudnnTensorStruct dxDesc,
                                Pointer dx,
                                cudnnTensorStruct dBnScaleBiasDesc,
                                @Const Pointer bnScale,
                                Pointer dBnScaleResult,
                                Pointer dBnBiasResult,
                                double epsilon,
                                @Const Pointer savedMean,
                                @Const Pointer savedInvVariance);

public static native @Cast("cudnnStatus_t") int cudnnBatchNormalizationBackwardEx(cudnnContext handle,
                                  @Cast("cudnnBatchNormMode_t") int mode,
                                  @Cast("cudnnBatchNormOps_t") int bnOps,

                                  @Const Pointer alphaDataDiff,
                                  @Const Pointer betaDataDiff,
                                  @Const Pointer alphaParamDiff,
                                  @Const Pointer betaParamDiff,
                                  cudnnTensorStruct xDesc,
                                  @Const Pointer xData,
                                  cudnnTensorStruct yDesc,
                                  @Const Pointer yData,
                                  cudnnTensorStruct dyDesc,
                                  @Const Pointer dyData,
                                  cudnnTensorStruct dzDesc,
                                  Pointer dzData,
                                  cudnnTensorStruct dxDesc,
                                  Pointer dxData,
                                  cudnnTensorStruct dBnScaleBiasDesc,
                                  @Const Pointer bnScaleData,
                                  @Const Pointer bnBiasData,
                                  Pointer dBnScaleData,
                                  Pointer dBnBiasData,
                                  double epsilon,
                                  @Const Pointer savedMean,
                                  @Const Pointer savedInvVariance,
                                  cudnnActivationStruct activationDesc,
                                  Pointer workSpace,
                                  @Cast("size_t") long workSpaceSizeInBytes,
                                  Pointer reserveSpace,
                                  @Cast("size_t") long reserveSpaceSizeInBytes);

/* APIs for spatial transformer network*/
/** enum cudnnSamplerType_t */
public static final int
    CUDNN_SAMPLER_BILINEAR = 0;

public static native @Cast("cudnnStatus_t") int cudnnCreateSpatialTransformerDescriptor(@ByPtrPtr cudnnSpatialTransformerStruct stDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerStruct stDesc,
                                       @Cast("cudnnSamplerType_t") int samplerType,
                                       @Cast("cudnnDataType_t") int dataType,
                                       int nbDims,
                                       @Const IntPointer dimA);
public static native @Cast("cudnnStatus_t") int cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerStruct stDesc,
                                       @Cast("cudnnSamplerType_t") int samplerType,
                                       @Cast("cudnnDataType_t") int dataType,
                                       int nbDims,
                                       @Const IntBuffer dimA);
public static native @Cast("cudnnStatus_t") int cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerStruct stDesc,
                                       @Cast("cudnnSamplerType_t") int samplerType,
                                       @Cast("cudnnDataType_t") int dataType,
                                       int nbDims,
                                       @Const int[] dimA);

public static native @Cast("cudnnStatus_t") int cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerStruct stDesc);

public static native @Cast("cudnnStatus_t") int cudnnSpatialTfGridGeneratorForward(cudnnContext handle,
                                   cudnnSpatialTransformerStruct stDesc,
                                   @Const Pointer theta,
                                   Pointer grid);

public static native @Cast("cudnnStatus_t") int cudnnSpatialTfGridGeneratorBackward(cudnnContext handle,
                                    cudnnSpatialTransformerStruct stDesc,
                                    @Const Pointer dgrid,
                                    Pointer dtheta);

public static native @Cast("cudnnStatus_t") int cudnnSpatialTfSamplerForward(cudnnContext handle,
                             cudnnSpatialTransformerStruct stDesc,
                             @Const Pointer alpha,
                             cudnnTensorStruct xDesc,
                             @Const Pointer x,
                             @Const Pointer grid,
                             @Const Pointer beta,
                             cudnnTensorStruct yDesc,
                             Pointer y);

public static native @Cast("cudnnStatus_t") int cudnnSpatialTfSamplerBackward(cudnnContext handle,
                              cudnnSpatialTransformerStruct stDesc,
                              @Const Pointer alpha,
                              cudnnTensorStruct xDesc,
                              @Const Pointer x,
                              @Const Pointer beta,
                              cudnnTensorStruct dxDesc,
                              Pointer dx,
                              @Const Pointer alphaDgrid,
                              cudnnTensorStruct dyDesc,
                              @Const Pointer dy,
                              @Const Pointer grid,
                              @Const Pointer betaDgrid,
                              Pointer dgrid);

@Opaque public static class cudnnDropoutStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnDropoutStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnDropoutStruct(Pointer p) { super(p); }
}

public static native @Cast("cudnnStatus_t") int cudnnCreateDropoutDescriptor(@ByPtrPtr cudnnDropoutStruct dropoutDesc);

public static native @Cast("cudnnStatus_t") int cudnnDestroyDropoutDescriptor(cudnnDropoutStruct dropoutDesc);

/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
public static native @Cast("cudnnStatus_t") int cudnnDropoutGetStatesSize(cudnnContext handle, @Cast("size_t*") SizeTPointer sizeInBytes);

/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
public static native @Cast("cudnnStatus_t") int cudnnDropoutGetReserveSpaceSize(cudnnTensorStruct xdesc, @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnSetDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                          cudnnContext handle,
                          float dropout,
                          Pointer states,
                          @Cast("size_t") long stateSizeInBytes,
                          @Cast("unsigned long long") long seed);

/* Restores the dropout descriptor to a previously saved-off state */
public static native @Cast("cudnnStatus_t") int cudnnRestoreDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                              cudnnContext handle,
                              float dropout,
                              Pointer states,
                              @Cast("size_t") long stateSizeInBytes,
                              @Cast("unsigned long long") long seed);

public static native @Cast("cudnnStatus_t") int cudnnGetDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                          cudnnContext handle,
                          FloatPointer dropout,
                          @Cast("void**") PointerPointer states,
                          @Cast("unsigned long long*") LongPointer seed);
public static native @Cast("cudnnStatus_t") int cudnnGetDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                          cudnnContext handle,
                          FloatPointer dropout,
                          @Cast("void**") @ByPtrPtr Pointer states,
                          @Cast("unsigned long long*") LongPointer seed);
public static native @Cast("cudnnStatus_t") int cudnnGetDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                          cudnnContext handle,
                          FloatBuffer dropout,
                          @Cast("void**") @ByPtrPtr Pointer states,
                          @Cast("unsigned long long*") LongBuffer seed);
public static native @Cast("cudnnStatus_t") int cudnnGetDropoutDescriptor(cudnnDropoutStruct dropoutDesc,
                          cudnnContext handle,
                          float[] dropout,
                          @Cast("void**") @ByPtrPtr Pointer states,
                          @Cast("unsigned long long*") long[] seed);

public static native @Cast("cudnnStatus_t") int cudnnDropoutForward(cudnnContext handle,
                    cudnnDropoutStruct dropoutDesc,
                    cudnnTensorStruct xdesc,
                    @Const Pointer x,
                    cudnnTensorStruct ydesc,
                    Pointer y,
                    Pointer reserveSpace,
                    @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnDropoutBackward(cudnnContext handle,
                     cudnnDropoutStruct dropoutDesc,
                     cudnnTensorStruct dydesc,
                     @Const Pointer dy,
                     cudnnTensorStruct dxdesc,
                     Pointer dx,
                     Pointer reserveSpace,
                     @Cast("size_t") long reserveSpaceSizeInBytes);

/* RNN API */
/** enum cudnnRNNMode_t */
public static final int
    CUDNN_RNN_RELU = 0, /* Stock RNN with ReLu activation */
    CUDNN_RNN_TANH = 1, /* Stock RNN with tanh activation */
    CUDNN_LSTM     = 2, /* LSTM with no peephole connections */
    CUDNN_GRU      = 3;  /* Using h' = tanh(r * Uh(t-1) + Wx) and h = (1 - z) * h' + z * h(t-1); */

/** enum cudnnDirectionMode_t */
public static final int
    CUDNN_UNIDIRECTIONAL = 0,
    CUDNN_BIDIRECTIONAL  = 1; /* Using output concatination at each step. Do we also want to support output sum? */

/** enum cudnnRNNInputMode_t */
public static final int CUDNN_LINEAR_INPUT = 0, CUDNN_SKIP_INPUT = 1;

/** enum cudnnRNNAlgo_t */
public static final int
    CUDNN_RNN_ALGO_STANDARD        = 0,
    CUDNN_RNN_ALGO_PERSIST_STATIC  = 1,
    CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
    CUDNN_RNN_ALGO_COUNT           = 3;

@Opaque public static class cudnnAlgorithmStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnAlgorithmStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnAlgorithmStruct(Pointer p) { super(p); }
}

@Opaque public static class cudnnAlgorithmPerformanceStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnAlgorithmPerformanceStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnAlgorithmPerformanceStruct(Pointer p) { super(p); }
}

@Opaque public static class cudnnRNNStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnRNNStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnRNNStruct(Pointer p) { super(p); }
}

public static native @Cast("cudnnStatus_t") int cudnnCreateRNNDescriptor(@ByPtrPtr cudnnRNNStruct rnnDesc);
public static native @Cast("cudnnStatus_t") int cudnnDestroyRNNDescriptor(cudnnRNNStruct rnnDesc);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        IntPointer returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        IntBuffer returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        int[] returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        IntPointer returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        IntBuffer returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardInferenceAlgorithmEx(cudnnContext handle,
                                        cudnnRNNStruct rnnDesc,
                                        int seqLength,
                                        @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                        @Const Pointer x,
                                        cudnnTensorStruct hxDesc,
                                        @Const Pointer hx,
                                        cudnnTensorStruct cxDesc,
                                        @Const Pointer cx,
                                        cudnnFilterStruct wDesc,
                                        @Const Pointer w,
                                        @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                        Pointer y,
                                        cudnnTensorStruct hyDesc,
                                        Pointer hy,
                                        cudnnTensorStruct cyDesc,
                                        Pointer cy,
                                        float findIntensity,
                                        int requestedAlgoCount,
                                        int[] returnedAlgoCount,
                                        @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                        Pointer workspace,
                                        @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNForwardTrainingAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       cudnnTensorStruct cxDesc,
                                       @Const Pointer cx,
                                       cudnnFilterStruct wDesc,
                                       @Const Pointer w,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       Pointer y,
                                       cudnnTensorStruct hyDesc,
                                       Pointer hy,
                                       cudnnTensorStruct cyDesc,
                                       Pointer cy,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    IntPointer returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") PointerPointer dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") PointerPointer dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    IntBuffer returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    int[] returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") PointerPointer dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") PointerPointer dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    IntPointer returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    IntBuffer returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardDataAlgorithmEx(cudnnContext handle,
                                    cudnnRNNStruct rnnDesc,
                                    int seqLength,
                                    @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                    @Const Pointer y,
                                    @Cast("cudnnTensorStruct**") PointerPointer dyDesc,
                                    @Const Pointer dy,
                                    cudnnTensorStruct dhyDesc,
                                    @Const Pointer dhy,
                                    cudnnTensorStruct dcyDesc,
                                    @Const Pointer dcy,
                                    cudnnFilterStruct wDesc,
                                    @Const Pointer w,
                                    cudnnTensorStruct hxDesc,
                                    @Const Pointer hx,
                                    cudnnTensorStruct cxDesc,
                                    @Const Pointer cx,
                                    @Cast("cudnnTensorStruct**") PointerPointer dxDesc,
                                    Pointer dx,
                                    cudnnTensorStruct dhxDesc,
                                    Pointer dhx,
                                    cudnnTensorStruct dcxDesc,
                                    Pointer dcx,
                                    float findIntensity,
                                    int requestedAlgoCount,
                                    int[] returnedAlgoCount,
                                    @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                    Pointer workspace,
                                    @Cast("size_t") long workSpaceSizeInBytes,
                                    Pointer reserveSpace,
                                    @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntPointer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, IntBuffer count);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnContext handle, cudnnRNNStruct rnnDesc, int[] count);

public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntPointer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       IntBuffer returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnContext handle,
                                       cudnnRNNStruct rnnDesc,
                                       int seqLength,
                                       @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                                       @Const Pointer x,
                                       cudnnTensorStruct hxDesc,
                                       @Const Pointer hx,
                                       @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                                       @Const Pointer y,
                                       float findIntensity,
                                       int requestedAlgoCount,
                                       int[] returnedAlgoCount,
                                       @ByPtrPtr cudnnAlgorithmPerformanceStruct perfResults,
                                       @Const Pointer workspace,
                                       @Cast("size_t") long workSpaceSizeInBytes,
                                       cudnnFilterStruct dwDesc,
                                       Pointer dw,
                                       @Const Pointer reserveSpace,
                                       @Cast("size_t") long reserveSpaceSizeInBytes);

@Opaque public static class cudnnPersistentRNNPlan extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnPersistentRNNPlan() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnPersistentRNNPlan(Pointer p) { super(p); }
}

/* Expensive. Creates the plan for the specific settings. */
public static native @Cast("cudnnStatus_t") int cudnnCreatePersistentRNNPlan(cudnnRNNStruct rnnDesc,
                             int minibatch,
                             @Cast("const cudnnDataType_t") int dataType,
                             @ByPtrPtr cudnnPersistentRNNPlan plan);

/* Attaches the plan to the descriptor. */
public static native @Cast("cudnnStatus_t") int cudnnSetPersistentRNNPlan(cudnnRNNStruct rnnDesc, cudnnPersistentRNNPlan plan);

public static native @Cast("cudnnStatus_t") int cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan plan);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNDescriptor(cudnnContext handle,
                      cudnnRNNStruct rnnDesc,
                      int hiddenSize,
                      int numLayers,
                      cudnnDropoutStruct dropoutDesc,
                      @Cast("cudnnRNNInputMode_t") int inputMode,
                      @Cast("cudnnDirectionMode_t") int direction,
                      @Cast("cudnnRNNMode_t") int mode,
                      @Cast("cudnnRNNAlgo_t") int algo,
                      @Cast("cudnnDataType_t") int dataType);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNProjectionLayers(cudnnContext handle,
                            cudnnRNNStruct rnnDesc,
                            int recProjSize,
                            int outProjSize);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNProjectionLayers(cudnnContext handle,
                            cudnnRNNStruct rnnDesc,
                            IntPointer recProjSize,
                            IntPointer outProjSize);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNProjectionLayers(cudnnContext handle,
                            cudnnRNNStruct rnnDesc,
                            IntBuffer recProjSize,
                            IntBuffer outProjSize);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNProjectionLayers(cudnnContext handle,
                            cudnnRNNStruct rnnDesc,
                            int[] recProjSize,
                            int[] outProjSize);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNAlgorithmDescriptor(cudnnContext handle, cudnnRNNStruct rnnDesc, cudnnAlgorithmStruct algoDesc);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNDescriptor(cudnnContext handle,
                      cudnnRNNStruct rnnDesc,
                      IntPointer hiddenSize,
                      IntPointer numLayers,
                      @ByPtrPtr cudnnDropoutStruct dropoutDesc,
                      @Cast("cudnnRNNInputMode_t*") IntPointer inputMode,
                      @Cast("cudnnDirectionMode_t*") IntPointer direction,
                      @Cast("cudnnRNNMode_t*") IntPointer mode,
                      @Cast("cudnnRNNAlgo_t*") IntPointer algo,
                      @Cast("cudnnDataType_t*") IntPointer dataType);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNDescriptor(cudnnContext handle,
                      cudnnRNNStruct rnnDesc,
                      IntBuffer hiddenSize,
                      IntBuffer numLayers,
                      @ByPtrPtr cudnnDropoutStruct dropoutDesc,
                      @Cast("cudnnRNNInputMode_t*") IntBuffer inputMode,
                      @Cast("cudnnDirectionMode_t*") IntBuffer direction,
                      @Cast("cudnnRNNMode_t*") IntBuffer mode,
                      @Cast("cudnnRNNAlgo_t*") IntBuffer algo,
                      @Cast("cudnnDataType_t*") IntBuffer dataType);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNDescriptor(cudnnContext handle,
                      cudnnRNNStruct rnnDesc,
                      int[] hiddenSize,
                      int[] numLayers,
                      @ByPtrPtr cudnnDropoutStruct dropoutDesc,
                      @Cast("cudnnRNNInputMode_t*") int[] inputMode,
                      @Cast("cudnnDirectionMode_t*") int[] direction,
                      @Cast("cudnnRNNMode_t*") int[] mode,
                      @Cast("cudnnRNNAlgo_t*") int[] algo,
                      @Cast("cudnnDataType_t*") int[] dataType);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNMatrixMathType(cudnnRNNStruct rnnDesc, @Cast("cudnnMathType_t") int mType);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNMatrixMathType(cudnnRNNStruct rnnDesc, @Cast("cudnnMathType_t*") IntPointer mType);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNMatrixMathType(cudnnRNNStruct rnnDesc, @Cast("cudnnMathType_t*") IntBuffer mType);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNMatrixMathType(cudnnRNNStruct rnnDesc, @Cast("cudnnMathType_t*") int[] mType);

/* dataType in the RNN descriptor is used to determine math precision */
/* dataType in weight descriptors and input descriptors is used to describe storage */
public static native @Cast("cudnnStatus_t") int cudnnGetRNNWorkspaceSize(cudnnContext handle,
                         cudnnRNNStruct rnnDesc,
                         int seqLength,
                         @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                         @Cast("size_t*") SizeTPointer sizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNWorkspaceSize(cudnnContext handle,
                         cudnnRNNStruct rnnDesc,
                         int seqLength,
                         @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                         @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNTrainingReserveSize(cudnnContext handle,
                               cudnnRNNStruct rnnDesc,
                               int seqLength,
                               @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                               @Cast("size_t*") SizeTPointer sizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNTrainingReserveSize(cudnnContext handle,
                               cudnnRNNStruct rnnDesc,
                               int seqLength,
                               @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                               @Cast("size_t*") SizeTPointer sizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNParamsSize(cudnnContext handle,
                      cudnnRNNStruct rnnDesc,
                      cudnnTensorStruct xDesc,
                      @Cast("size_t*") SizeTPointer sizeInBytes,
                      @Cast("cudnnDataType_t") int dataType);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNLinLayerMatrixParams(cudnnContext handle,
                                cudnnRNNStruct rnnDesc,
                                int pseudoLayer,
                                cudnnTensorStruct xDesc,
                                cudnnFilterStruct wDesc,
                                @Const Pointer w,
                                int linLayerID,
                                cudnnFilterStruct linLayerMatDesc,
                                @Cast("void**") PointerPointer linLayerMat);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNLinLayerMatrixParams(cudnnContext handle,
                                cudnnRNNStruct rnnDesc,
                                int pseudoLayer,
                                cudnnTensorStruct xDesc,
                                cudnnFilterStruct wDesc,
                                @Const Pointer w,
                                int linLayerID,
                                cudnnFilterStruct linLayerMatDesc,
                                @Cast("void**") @ByPtrPtr Pointer linLayerMat);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNLinLayerBiasParams(cudnnContext handle,
                              cudnnRNNStruct rnnDesc,
                              int pseudoLayer,
                              cudnnTensorStruct xDesc,
                              cudnnFilterStruct wDesc,
                              @Const Pointer w,
                              int linLayerID,
                              cudnnFilterStruct linLayerBiasDesc,
                              @Cast("void**") PointerPointer linLayerBias);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNLinLayerBiasParams(cudnnContext handle,
                              cudnnRNNStruct rnnDesc,
                              int pseudoLayer,
                              cudnnTensorStruct xDesc,
                              cudnnFilterStruct wDesc,
                              @Const Pointer w,
                              int linLayerID,
                              cudnnFilterStruct linLayerBiasDesc,
                              @Cast("void**") @ByPtrPtr Pointer linLayerBias);

public static native @Cast("cudnnStatus_t") int cudnnRNNForwardInference(cudnnContext handle,
                         cudnnRNNStruct rnnDesc,
                         int seqLength,
                         @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                         @Const Pointer x,
                         cudnnTensorStruct hxDesc,
                         @Const Pointer hx,
                         cudnnTensorStruct cxDesc,
                         @Const Pointer cx,
                         cudnnFilterStruct wDesc,
                         @Const Pointer w,
                         @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                         Pointer y,
                         cudnnTensorStruct hyDesc,
                         Pointer hy,
                         cudnnTensorStruct cyDesc,
                         Pointer cy,
                         Pointer workspace,
                         @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnRNNForwardInference(cudnnContext handle,
                         cudnnRNNStruct rnnDesc,
                         int seqLength,
                         @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                         @Const Pointer x,
                         cudnnTensorStruct hxDesc,
                         @Const Pointer hx,
                         cudnnTensorStruct cxDesc,
                         @Const Pointer cx,
                         cudnnFilterStruct wDesc,
                         @Const Pointer w,
                         @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                         Pointer y,
                         cudnnTensorStruct hyDesc,
                         Pointer hy,
                         cudnnTensorStruct cyDesc,
                         Pointer cy,
                         Pointer workspace,
                         @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNForwardTraining(cudnnContext handle,
                        cudnnRNNStruct rnnDesc,
                        int seqLength,
                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                        @Const Pointer x,
                        cudnnTensorStruct hxDesc,
                        @Const Pointer hx,
                        cudnnTensorStruct cxDesc,
                        @Const Pointer cx,
                        cudnnFilterStruct wDesc,
                        @Const Pointer w,
                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                        Pointer y,
                        cudnnTensorStruct hyDesc,
                        Pointer hy,
                        cudnnTensorStruct cyDesc,
                        Pointer cy,
                        Pointer workspace,
                        @Cast("size_t") long workSpaceSizeInBytes,
                        Pointer reserveSpace,
                        @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnRNNForwardTraining(cudnnContext handle,
                        cudnnRNNStruct rnnDesc,
                        int seqLength,
                        @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                        @Const Pointer x,
                        cudnnTensorStruct hxDesc,
                        @Const Pointer hx,
                        cudnnTensorStruct cxDesc,
                        @Const Pointer cx,
                        cudnnFilterStruct wDesc,
                        @Const Pointer w,
                        @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                        Pointer y,
                        cudnnTensorStruct hyDesc,
                        Pointer hy,
                        cudnnTensorStruct cyDesc,
                        Pointer cy,
                        Pointer workspace,
                        @Cast("size_t") long workSpaceSizeInBytes,
                        Pointer reserveSpace,
                        @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardData(cudnnContext handle,
                     cudnnRNNStruct rnnDesc,
                     int seqLength,
                     @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                     @Const Pointer y,
                     @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dyDesc,
                     @Const Pointer dy,
                     cudnnTensorStruct dhyDesc,
                     @Const Pointer dhy,
                     cudnnTensorStruct dcyDesc,
                     @Const Pointer dcy,
                     cudnnFilterStruct wDesc,
                     @Const Pointer w,
                     cudnnTensorStruct hxDesc,
                     @Const Pointer hx,
                     cudnnTensorStruct cxDesc,
                     @Const Pointer cx,
                     @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct dxDesc,
                     Pointer dx,
                     cudnnTensorStruct dhxDesc,
                     Pointer dhx,
                     cudnnTensorStruct dcxDesc,
                     Pointer dcx,
                     Pointer workspace,
                     @Cast("size_t") long workSpaceSizeInBytes,
                     Pointer reserveSpace,
                     @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardData(cudnnContext handle,
                     cudnnRNNStruct rnnDesc,
                     int seqLength,
                     @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                     @Const Pointer y,
                     @Cast("cudnnTensorStruct**") PointerPointer dyDesc,
                     @Const Pointer dy,
                     cudnnTensorStruct dhyDesc,
                     @Const Pointer dhy,
                     cudnnTensorStruct dcyDesc,
                     @Const Pointer dcy,
                     cudnnFilterStruct wDesc,
                     @Const Pointer w,
                     cudnnTensorStruct hxDesc,
                     @Const Pointer hx,
                     cudnnTensorStruct cxDesc,
                     @Const Pointer cx,
                     @Cast("cudnnTensorStruct**") PointerPointer dxDesc,
                     Pointer dx,
                     cudnnTensorStruct dhxDesc,
                     Pointer dhx,
                     cudnnTensorStruct dcxDesc,
                     Pointer dcx,
                     Pointer workspace,
                     @Cast("size_t") long workSpaceSizeInBytes,
                     Pointer reserveSpace,
                     @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardWeights(cudnnContext handle,
                        cudnnRNNStruct rnnDesc,
                        int seqLength,
                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct xDesc,
                        @Const Pointer x,
                        cudnnTensorStruct hxDesc,
                        @Const Pointer hx,
                        @Cast("cudnnTensorStruct**") @ByPtrPtr cudnnTensorStruct yDesc,
                        @Const Pointer y,
                        @Const Pointer workspace,
                        @Cast("size_t") long workSpaceSizeInBytes,
                        cudnnFilterStruct dwDesc,
                        Pointer dw,
                        @Const Pointer reserveSpace,
                        @Cast("size_t") long reserveSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardWeights(cudnnContext handle,
                        cudnnRNNStruct rnnDesc,
                        int seqLength,
                        @Cast("cudnnTensorStruct**") PointerPointer xDesc,
                        @Const Pointer x,
                        cudnnTensorStruct hxDesc,
                        @Const Pointer hx,
                        @Cast("cudnnTensorStruct**") PointerPointer yDesc,
                        @Const Pointer y,
                        @Const Pointer workspace,
                        @Cast("size_t") long workSpaceSizeInBytes,
                        cudnnFilterStruct dwDesc,
                        Pointer dw,
                        @Const Pointer reserveSpace,
                        @Cast("size_t") long reserveSpaceSizeInBytes);

/** enum cudnnCTCLossAlgo_t */
public static final int CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1;

/*
* Create an instance of a CTC (Connectionist Temporal Classification) loss descriptor
*/
public static native @Cast("cudnnStatus_t") int cudnnCreateCTCLossDescriptor(@ByPtrPtr cudnnCTCLossStruct ctcLossDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetCTCLossDescriptor(cudnnCTCLossStruct ctcLossDesc, @Cast("cudnnDataType_t") int compType);

public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossDescriptor(cudnnCTCLossStruct ctcLossDesc, @Cast("cudnnDataType_t*") IntPointer compType);
public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossDescriptor(cudnnCTCLossStruct ctcLossDesc, @Cast("cudnnDataType_t*") IntBuffer compType);
public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossDescriptor(cudnnCTCLossStruct ctcLossDesc, @Cast("cudnnDataType_t*") int[] compType);

public static native @Cast("cudnnStatus_t") int cudnnDestroyCTCLossDescriptor(cudnnCTCLossStruct ctcLossDesc);

/* return the ctc costs and gradients, given the probabilities and labels */
public static native @Cast("cudnnStatus_t") int cudnnCTCLoss(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    @Const Pointer probs,
    @Const IntPointer labels,
    @Const IntPointer labelLengths,
    @Const IntPointer inputLengths,
    Pointer costs,
    cudnnTensorStruct gradientsDesc,
    @Const Pointer gradients,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    Pointer workspace,
    @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnCTCLoss(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    @Const Pointer probs,
    @Const IntBuffer labels,
    @Const IntBuffer labelLengths,
    @Const IntBuffer inputLengths,
    Pointer costs,
    cudnnTensorStruct gradientsDesc,
    @Const Pointer gradients,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    Pointer workspace,
    @Cast("size_t") long workSpaceSizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnCTCLoss(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    @Const Pointer probs,
    @Const int[] labels,
    @Const int[] labelLengths,
    @Const int[] inputLengths,
    Pointer costs,
    cudnnTensorStruct gradientsDesc,
    @Const Pointer gradients,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    Pointer workspace,
    @Cast("size_t") long workSpaceSizeInBytes); /* the workspace size needed */

/* return the workspace size needed for ctc */
public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossWorkspaceSize(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    cudnnTensorStruct gradientsDesc,
    @Const IntPointer labels,
    @Const IntPointer labelLengths,
    @Const IntPointer inputLengths,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    @Cast("size_t*") SizeTPointer sizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossWorkspaceSize(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    cudnnTensorStruct gradientsDesc,
    @Const IntBuffer labels,
    @Const IntBuffer labelLengths,
    @Const IntBuffer inputLengths,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    @Cast("size_t*") SizeTPointer sizeInBytes);
public static native @Cast("cudnnStatus_t") int cudnnGetCTCLossWorkspaceSize(
    cudnnContext handle,
    cudnnTensorStruct probsDesc,
    cudnnTensorStruct gradientsDesc,
    @Const int[] labels,
    @Const int[] labelLengths,
    @Const int[] inputLengths,
    @Cast("cudnnCTCLossAlgo_t") int algo,
    cudnnCTCLossStruct ctcLossDesc,
    @Cast("size_t*") SizeTPointer sizeInBytes); /* pointer to the returned workspace size */

public static class cudnnAlgorithm_t extends Pointer {
    static { Loader.load(); }
    /** Default native constructor. */
    public cudnnAlgorithm_t() { super((Pointer)null); allocate(); }
    /** Native array allocator. Access with {@link Pointer#position(long)}. */
    public cudnnAlgorithm_t(long size) { super((Pointer)null); allocateArray(size); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnAlgorithm_t(Pointer p) { super(p); }
    private native void allocate();
    private native void allocateArray(long size);
    @Override public cudnnAlgorithm_t position(long position) {
        return (cudnnAlgorithm_t)super.position(position);
    }

        @Name("algo.convFwdAlgo") public native @Cast("cudnnConvolutionFwdAlgo_t") int algo_convFwdAlgo(); public native cudnnAlgorithm_t algo_convFwdAlgo(int algo_convFwdAlgo);
        @Name("algo.convBwdFilterAlgo") public native @Cast("cudnnConvolutionBwdFilterAlgo_t") int algo_convBwdFilterAlgo(); public native cudnnAlgorithm_t algo_convBwdFilterAlgo(int algo_convBwdFilterAlgo);
        @Name("algo.convBwdDataAlgo") public native @Cast("cudnnConvolutionBwdDataAlgo_t") int algo_convBwdDataAlgo(); public native cudnnAlgorithm_t algo_convBwdDataAlgo(int algo_convBwdDataAlgo);
        @Name("algo.RNNAlgo") public native @Cast("cudnnRNNAlgo_t") int algo_RNNAlgo(); public native cudnnAlgorithm_t algo_RNNAlgo(int algo_RNNAlgo);
        @Name("algo.CTCLossAlgo") public native @Cast("cudnnCTCLossAlgo_t") int algo_CTCLossAlgo(); public native cudnnAlgorithm_t algo_CTCLossAlgo(int algo_CTCLossAlgo);
}

public static native @Cast("cudnnStatus_t") int cudnnCreateAlgorithmDescriptor(@ByPtrPtr cudnnAlgorithmStruct algoDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetAlgorithmDescriptor(cudnnAlgorithmStruct algoDesc, @ByVal cudnnAlgorithm_t algorithm);

public static native @Cast("cudnnStatus_t") int cudnnGetAlgorithmDescriptor(cudnnAlgorithmStruct algoDesc, cudnnAlgorithm_t algorithm);

public static native @Cast("cudnnStatus_t") int cudnnCopyAlgorithmDescriptor(cudnnAlgorithmStruct src, cudnnAlgorithmStruct dest);

public static native @Cast("cudnnStatus_t") int cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmStruct algoDesc);

public static native @Cast("cudnnStatus_t") int cudnnCreateAlgorithmPerformance(@ByPtrPtr cudnnAlgorithmPerformanceStruct algoPerf, int numberToCreate);

public static native @Cast("cudnnStatus_t") int cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformanceStruct algoPerf,
                             cudnnAlgorithmStruct algoDesc,
                             @Cast("cudnnStatus_t") int status,
                             float time,
                             @Cast("size_t") long memory);

public static native @Cast("cudnnStatus_t") int cudnnGetAlgorithmPerformance(cudnnAlgorithmPerformanceStruct algoPerf,
                             @ByPtrPtr cudnnAlgorithmStruct algoDesc,
                             @Cast("cudnnStatus_t*") IntPointer status,
                             FloatPointer time,
                             @Cast("size_t*") SizeTPointer memory);
public static native @Cast("cudnnStatus_t") int cudnnGetAlgorithmPerformance(cudnnAlgorithmPerformanceStruct algoPerf,
                             @ByPtrPtr cudnnAlgorithmStruct algoDesc,
                             @Cast("cudnnStatus_t*") IntBuffer status,
                             FloatBuffer time,
                             @Cast("size_t*") SizeTPointer memory);
public static native @Cast("cudnnStatus_t") int cudnnGetAlgorithmPerformance(cudnnAlgorithmPerformanceStruct algoPerf,
                             @ByPtrPtr cudnnAlgorithmStruct algoDesc,
                             @Cast("cudnnStatus_t*") int[] status,
                             float[] time,
                             @Cast("size_t*") SizeTPointer memory);

public static native @Cast("cudnnStatus_t") int cudnnDestroyAlgorithmPerformance(@ByPtrPtr cudnnAlgorithmPerformanceStruct algoPerf, int numberToDestroy);

public static native @Cast("cudnnStatus_t") int cudnnGetAlgorithmSpaceSize(cudnnContext handle, cudnnAlgorithmStruct algoDesc, @Cast("size_t*") SizeTPointer algoSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnSaveAlgorithm(cudnnContext handle,
                   cudnnAlgorithmStruct algoDesc,
                   Pointer algoSpace,
                   @Cast("size_t") long algoSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRestoreAlgorithm(cudnnContext handle,
                      Pointer algoSpace,
                      @Cast("size_t") long algoSpaceSizeInBytes,
                      cudnnAlgorithmStruct algoDesc);

/*
* CUDNN clipping mode type
*/
/** enum cudnnRNNClipMode_t */
public static final int CUDNN_RNN_CLIP_NONE = 0, CUDNN_RNN_CLIP_MINMAX = 1;

public static native @Cast("cudnnStatus_t") int cudnnRNNSetClip(cudnnContext handle,
                cudnnRNNStruct rnnDesc,
                @Cast("cudnnRNNClipMode_t") int clipMode,
                @Cast("cudnnNanPropagation_t") int clipNanOpt,
                double lclip,
                double rclip);

public static native @Cast("cudnnStatus_t") int cudnnRNNGetClip(cudnnContext handle,
                cudnnRNNStruct rnnDesc,
                @Cast("cudnnRNNClipMode_t*") IntPointer clipMode,
                @Cast("cudnnNanPropagation_t*") IntPointer clipNanOpt,
                DoublePointer lclip,
                DoublePointer rclip);
public static native @Cast("cudnnStatus_t") int cudnnRNNGetClip(cudnnContext handle,
                cudnnRNNStruct rnnDesc,
                @Cast("cudnnRNNClipMode_t*") IntBuffer clipMode,
                @Cast("cudnnNanPropagation_t*") IntBuffer clipNanOpt,
                DoubleBuffer lclip,
                DoubleBuffer rclip);
public static native @Cast("cudnnStatus_t") int cudnnRNNGetClip(cudnnContext handle,
                cudnnRNNStruct rnnDesc,
                @Cast("cudnnRNNClipMode_t*") int[] clipMode,
                @Cast("cudnnNanPropagation_t*") int[] clipNanOpt,
                double[] lclip,
                double[] rclip);

/** enum cudnnSeverity_t */
public static final int
    CUDNN_SEV_FATAL   = 0,
    CUDNN_SEV_ERROR   = 1,
    CUDNN_SEV_WARNING = 2,
    CUDNN_SEV_INFO    = 3;

/* Message masks to be used with cudnnSetCallback() */
public static final long CUDNN_SEV_ERROR_EN = (1L << CUDNN_SEV_ERROR);
public static final long CUDNN_SEV_WARNING_EN = (1L << CUDNN_SEV_WARNING);
public static final long CUDNN_SEV_INFO_EN = (1L << CUDNN_SEV_INFO);

/* struct containing useful informaiton for each API call */
public static class cudnnDebug_t extends Pointer {
    static { Loader.load(); }
    /** Default native constructor. */
    public cudnnDebug_t() { super((Pointer)null); allocate(); }
    /** Native array allocator. Access with {@link Pointer#position(long)}. */
    public cudnnDebug_t(long size) { super((Pointer)null); allocateArray(size); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnDebug_t(Pointer p) { super(p); }
    private native void allocate();
    private native void allocateArray(long size);
    @Override public cudnnDebug_t position(long position) {
        return (cudnnDebug_t)super.position(position);
    }

    public native @Cast("unsigned") int cudnn_version(); public native cudnnDebug_t cudnn_version(int cudnn_version);
    public native @Cast("cudnnStatus_t") int cudnnStatus(); public native cudnnDebug_t cudnnStatus(int cudnnStatus);
    public native @Cast("unsigned") int time_sec(); public native cudnnDebug_t time_sec(int time_sec);      /* epoch time in seconds */
    public native @Cast("unsigned") int time_usec(); public native cudnnDebug_t time_usec(int time_usec);     /* microseconds part of epoch time */
    public native @Cast("unsigned") int time_delta(); public native cudnnDebug_t time_delta(int time_delta);    /* time since start in seconds */
    public native cudnnContext handle(); public native cudnnDebug_t handle(cudnnContext handle);   /* cudnn handle */
    public native CUstream_st stream(); public native cudnnDebug_t stream(CUstream_st stream);    /* cuda stream ID */
    public native @Cast("unsigned long long") long pid(); public native cudnnDebug_t pid(long pid); /* process ID */
    public native @Cast("unsigned long long") long tid(); public native cudnnDebug_t tid(long tid); /* thread ID */
    public native int cudaDeviceId(); public native cudnnDebug_t cudaDeviceId(int cudaDeviceId);       /* CUDA device ID */
    public native int reserved(int i); public native cudnnDebug_t reserved(int i, int reserved);
    @MemberGetter public native IntPointer reserved();       /* reserved for future use */
}

public static class cudnnCallback_t extends FunctionPointer {
    static { Loader.load(); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public    cudnnCallback_t(Pointer p) { super(p); }
    protected cudnnCallback_t() { allocate(); }
    private native void allocate();
    public native void call(@Cast("cudnnSeverity_t") int sev, Pointer udata, @Const cudnnDebug_t dbg, @Cast("const char*") BytePointer msg);
}

public static native @Cast("cudnnStatus_t") int cudnnSetCallback(@Cast("unsigned") int mask, Pointer udata, cudnnCallback_t fptr);

public static native @Cast("cudnnStatus_t") int cudnnGetCallback(@Cast("unsigned*") IntPointer mask, @Cast("void**") PointerPointer udata, @ByPtrPtr cudnnCallback_t fptr);
public static native @Cast("cudnnStatus_t") int cudnnGetCallback(@Cast("unsigned*") IntPointer mask, @Cast("void**") @ByPtrPtr Pointer udata, @ByPtrPtr cudnnCallback_t fptr);
public static native @Cast("cudnnStatus_t") int cudnnGetCallback(@Cast("unsigned*") IntBuffer mask, @Cast("void**") @ByPtrPtr Pointer udata, @ByPtrPtr cudnnCallback_t fptr);
public static native @Cast("cudnnStatus_t") int cudnnGetCallback(@Cast("unsigned*") int[] mask, @Cast("void**") @ByPtrPtr Pointer udata, @ByPtrPtr cudnnCallback_t fptr);

@Opaque public static class cudnnRNNDataStruct extends Pointer {
    /** Empty constructor. Calls {@code super((Pointer)null)}. */
    public cudnnRNNDataStruct() { super((Pointer)null); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public cudnnRNNDataStruct(Pointer p) { super(p); }
}

/** enum cudnnRNNDataLayout_t */
public static final int
    CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED   = 0,
    CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED     = 1, /* imples sequences are sorted */
    CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2;

/** enum cudnnRNNPaddingMode_t */
public static final int
    CUDNN_RNN_PADDED_IO_DISABLED = 0,
    CUDNN_RNN_PADDED_IO_ENABLED  = 1;

public static native @Cast("cudnnStatus_t") int cudnnSetRNNPaddingMode(cudnnRNNStruct rnnDesc, @Cast("cudnnRNNPaddingMode_t") int paddingMode);

public static native @Cast("cudnnStatus_t") int cudnnGetRNNPaddingMode(cudnnRNNStruct rnnDesc, @Cast("cudnnRNNPaddingMode_t*") IntPointer paddingMode);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNPaddingMode(cudnnRNNStruct rnnDesc, @Cast("cudnnRNNPaddingMode_t*") IntBuffer paddingMode);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNPaddingMode(cudnnRNNStruct rnnDesc, @Cast("cudnnRNNPaddingMode_t*") int[] paddingMode);

public static native @Cast("cudnnStatus_t") int cudnnCreateRNNDataDescriptor(@ByPtrPtr cudnnRNNDataStruct RNNDataDesc);

public static native @Cast("cudnnStatus_t") int cudnnDestroyRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t") int dataType,
                          @Cast("cudnnRNNDataLayout_t") int layout,
                          int maxSeqLength,
                          int batchSize,
                          int vectorSize,
                          @Const IntPointer seqLengthArray,
                          Pointer paddingFill);
public static native @Cast("cudnnStatus_t") int cudnnSetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t") int dataType,
                          @Cast("cudnnRNNDataLayout_t") int layout,
                          int maxSeqLength,
                          int batchSize,
                          int vectorSize,
                          @Const IntBuffer seqLengthArray,
                          Pointer paddingFill);
public static native @Cast("cudnnStatus_t") int cudnnSetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t") int dataType,
                          @Cast("cudnnRNNDataLayout_t") int layout,
                          int maxSeqLength,
                          int batchSize,
                          int vectorSize,
                          @Const int[] seqLengthArray,
                          Pointer paddingFill);         /* symbol for filling padding position in output */

public static native @Cast("cudnnStatus_t") int cudnnGetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t*") IntPointer dataType,
                          @Cast("cudnnRNNDataLayout_t*") IntPointer layout,
                          IntPointer maxSeqLength,
                          IntPointer batchSize,
                          IntPointer vectorSize,
                          int arrayLengthRequested,
                          IntPointer seqLengthArray,
                          Pointer paddingFill);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t*") IntBuffer dataType,
                          @Cast("cudnnRNNDataLayout_t*") IntBuffer layout,
                          IntBuffer maxSeqLength,
                          IntBuffer batchSize,
                          IntBuffer vectorSize,
                          int arrayLengthRequested,
                          IntBuffer seqLengthArray,
                          Pointer paddingFill);
public static native @Cast("cudnnStatus_t") int cudnnGetRNNDataDescriptor(cudnnRNNDataStruct RNNDataDesc,
                          @Cast("cudnnDataType_t*") int[] dataType,
                          @Cast("cudnnRNNDataLayout_t*") int[] layout,
                          int[] maxSeqLength,
                          int[] batchSize,
                          int[] vectorSize,
                          int arrayLengthRequested,
                          int[] seqLengthArray,
                          Pointer paddingFill);

public static native @Cast("cudnnStatus_t") int cudnnRNNForwardTrainingEx(cudnnContext handle,
                          cudnnRNNStruct rnnDesc,
                          cudnnRNNDataStruct xDesc,
                          @Const Pointer x,
                          cudnnTensorStruct hxDesc,
                          @Const Pointer hx,
                          cudnnTensorStruct cxDesc,
                          @Const Pointer cx,
                          cudnnFilterStruct wDesc,
                          @Const Pointer w,
                          cudnnRNNDataStruct yDesc,
                          Pointer y,
                          cudnnTensorStruct hyDesc,
                          Pointer hy,
                          cudnnTensorStruct cyDesc,
                          Pointer cy,
                          cudnnRNNDataStruct kDesc,
                          @Const Pointer keys,
                          cudnnRNNDataStruct cDesc,
                          Pointer cAttn,
                          cudnnRNNDataStruct iDesc,
                          Pointer iAttn,
                          cudnnRNNDataStruct qDesc,
                          Pointer queries,
                          Pointer workSpace,
                          @Cast("size_t") long workSpaceSizeInBytes,
                          Pointer reserveSpace,
                          @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNForwardInferenceEx(cudnnContext handle,
                           cudnnRNNStruct rnnDesc,
                           cudnnRNNDataStruct xDesc,
                           @Const Pointer x,
                           cudnnTensorStruct hxDesc,
                           @Const Pointer hx,
                           cudnnTensorStruct cxDesc,
                           @Const Pointer cx,
                           cudnnFilterStruct wDesc,
                           @Const Pointer w,
                           cudnnRNNDataStruct yDesc,
                           Pointer y,
                           cudnnTensorStruct hyDesc,
                           Pointer hy,
                           cudnnTensorStruct cyDesc,
                           Pointer cy,
                           cudnnRNNDataStruct kDesc,
                           @Const Pointer keys,
                           cudnnRNNDataStruct cDesc,
                           Pointer cAttn,
                           cudnnRNNDataStruct iDesc,
                           Pointer iAttn,
                           cudnnRNNDataStruct qDesc,
                           Pointer queries,
                           Pointer workSpace,
                           @Cast("size_t") long workSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardDataEx(cudnnContext handle,
                       cudnnRNNStruct rnnDesc,
                       cudnnRNNDataStruct yDesc,
                       @Const Pointer y,
                       cudnnRNNDataStruct dyDesc,
                       @Const Pointer dy,
                       cudnnRNNDataStruct dcDesc,
                       @Const Pointer dcAttn,
                       cudnnTensorStruct dhyDesc,
                       @Const Pointer dhy,
                       cudnnTensorStruct dcyDesc,
                       @Const Pointer dcy,
                       cudnnFilterStruct wDesc,
                       @Const Pointer w,
                       cudnnTensorStruct hxDesc,
                       @Const Pointer hx,
                       cudnnTensorStruct cxDesc,
                       @Const Pointer cx,
                       cudnnRNNDataStruct dxDesc,
                       Pointer dx,
                       cudnnTensorStruct dhxDesc,
                       Pointer dhx,
                       cudnnTensorStruct dcxDesc,
                       Pointer dcx,
                       cudnnRNNDataStruct dkDesc,
                       Pointer dkeys,
                       Pointer workSpace,
                       @Cast("size_t") long workSpaceSizeInBytes,
                       Pointer reserveSpace,
                       @Cast("size_t") long reserveSpaceSizeInBytes);

public static native @Cast("cudnnStatus_t") int cudnnRNNBackwardWeightsEx(cudnnContext handle,
                          cudnnRNNStruct rnnDesc,
                          cudnnRNNDataStruct xDesc,
                          @Const Pointer x,
                          cudnnTensorStruct hxDesc,
                          @Const Pointer hx,
                          cudnnRNNDataStruct yDesc,
                          @Const Pointer y,
                          Pointer workSpace,
                          @Cast("size_t") long workSpaceSizeInBytes,
                          cudnnFilterStruct dwDesc,
                          Pointer dw,
                          Pointer reserveSpace,
                          @Cast("size_t") long reserveSpaceSizeInBytes);

/* DEPRECATED routines to be removed next release :
   User should use the non-suffixed version (which has the API and functionality of _v6 version)
   Routines with _v5 suffix has the functionality of the non-suffixed routines in the CUDNN V6
 */

public static native @Cast("cudnnStatus_t") int cudnnSetRNNDescriptor_v6(cudnnContext handle,
                         cudnnRNNStruct rnnDesc,
                         int hiddenSize,
                         int numLayers,
                         cudnnDropoutStruct dropoutDesc,
                         @Cast("cudnnRNNInputMode_t") int inputMode,
                         @Cast("cudnnDirectionMode_t") int direction,
                         @Cast("cudnnRNNMode_t") int mode,
                         @Cast("cudnnRNNAlgo_t") int algo,
                         @Cast("cudnnDataType_t") int dataType);

public static native @Cast("cudnnStatus_t") int cudnnSetRNNDescriptor_v5(cudnnRNNStruct rnnDesc,
                         int hiddenSize,
                         int numLayers,
                         cudnnDropoutStruct dropoutDesc,
                         @Cast("cudnnRNNInputMode_t") int inputMode,
                         @Cast("cudnnDirectionMode_t") int direction,
                         @Cast("cudnnRNNMode_t") int mode,
                         @Cast("cudnnDataType_t") int dataType);

// #if defined(__cplusplus)
// #endif

// #endif /* CUDNN_H_ */


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy