jcuda.driver.JCudaDriver Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nd4j-jcublas-common Show documentation
There is a newer version: 0.4-rc3.7
/*
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 *
 */

package jcuda.driver;

import jcuda.CudaException;
import jcuda.LibUtils;
import jcuda.LogLevel;
import jcuda.Pointer;

/**
 * Java bindings for the NVidia CUDA driver API.

 * 

 * Most comments are extracted from the CUDA online documentation
 */
public class JCudaDriver
{
    /** The CUDA version */
    public static final int CUDA_VERSION = 7000;

    /**
     * If set, host memory is portable between CUDA contexts.
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_PORTABLE = 0x01;

    /**
     * If set, host memory is mapped into CUDA address space and
     * JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer.
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_DEVICEMAP = 0x02;

    /**
     * If set, host memory is allocated as write-combined - fast to write,
     * faster to DMA, slow to read except via SSE4 streaming load instruction
     * (MOVNTDQA).
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_WRITECOMBINED = 0x04;

    /**
     * If set, host memory is portable between CUDA contexts.
     * Flag for ::cuMemHostRegister()
     */
    public static final int CU_MEMHOSTREGISTER_PORTABLE   = 0x01;

    /**
     * If set, host memory is mapped into CUDA address space and
     * ::cuMemHostGetDevicePointer() may be called on the host pointer.
     * Flag for ::cuMemHostRegister()
     */
    public static final int CU_MEMHOSTREGISTER_DEVICEMAP  = 0x02;

    /**
     * If set, peer memory is mapped into CUDA address space and
     * ::cuMemPeerGetDevicePointer() may be called on the host pointer.
     * Flag for ::cuMemPeerRegister()
     * @deprecated This value has been added in CUDA 4.0 RC,
     * and removed in CUDA 4.0 RC2
     */
    public static final int CU_MEMPEERREGISTER_DEVICEMAP  = 0x02;

    /**
     * If set, the CUDA array is a collection of layers, where each layer is either a 1D
     * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number
     * of layers, not the depth of a 3D array.
     */
    public static final int CUDA_ARRAY3D_LAYERED = 0x01;

    /**
     * If set, the CUDA array contains an array of 2D slices
     * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
     * the number of slices, not the depth of a 3D array.
     * @deprecated use CUDA_ARRAY3D_LAYERED
     */
    public static final int CUDA_ARRAY3D_2DARRAY = 0x01;


    /**
     * This flag must be set in order to bind a surface reference
     * to the CUDA array
     */
    public static final int CUDA_ARRAY3D_SURFACE_LDST = 0x02;

    /**
     * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The
     * width of such a CUDA array must be equal to its height, and Depth must be six.
     * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps
     * and Depth must be a multiple of six.
     */
    public static final int CUDA_ARRAY3D_CUBEMAP = 0x04;

    /**
     * This flag must be set in order to perform texture gather operations
     * on a CUDA array.
     */
    public static final int CUDA_ARRAY3D_TEXTURE_GATHER = 0x08;

    /**
     * This flag if set indicates that the CUDA
     * array is a DEPTH_TEXTURE.
    */
    public static final int CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10;
    
    /**
     * For texture references loaded into the module, use default
     * texunit from texture reference
     */
    public static final int CU_PARAM_TR_DEFAULT = -1;

    /**
     * Override the texref format with a format inferred from the array
     */
    public static final int CU_TRSA_OVERRIDE_FORMAT = 0x01;

    /**
     * Read the texture as integers rather than promoting the values
     * to floats in the range [0,1]
     */
    public static final int CU_TRSF_READ_AS_INTEGER = 0x01;

    /**
     * Use normalized texture coordinates in the range [0,1) instead of [0,dim)
     */
    public static final int CU_TRSF_NORMALIZED_COORDINATES = 0x02;

    /**
     * Perform sRGB->linear conversion during texture read.
     * Flag for JCudaDriver#cuTexRefSetFlags()
     */
    public static final int CU_TRSF_SRGB  = 0x10;

    /**
     * Specifies a stream callback does not block the stream while
     * executing.  This is the default behavior.
     * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
     * 
     * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) 
     * and may be removed (or added again) in future releases
     */
    public static final int CU_STREAM_CALLBACK_NONBLOCKING  = 0x00;

    /**
     * If set, the stream callback blocks the stream until it is
     * done executing.
     * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
     * 
     * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) 
     * and may be removed (or added again) in future releases
     */
    public static final int CU_STREAM_CALLBACK_BLOCKING     = 0x01;
    
    /**
     * Private inner class for the constant pointer values
     * CU_LAUNCH_PARAM_END, CU_LAUNCH_PARAM_BUFFER_POINTER,
     * and CU_LAUNCH_PARAM_BUFFER_SIZE.
     *
     * TODO: These constants could be misused: There is no
     * mechanism for preventing these Pointers to be used
     * for memory allocation. However, at the moment there
     * is no other way for emulating these pointer constants.
     */
    private static class ConstantPointer extends Pointer
    {
        private ConstantPointer(long value)
        {
            super(value);
        }
    }

    /**
     * End of array terminator for the \p extra parameter to
     * ::cuLaunchKernel
     */
    public static final Pointer CU_LAUNCH_PARAM_END = new ConstantPointer(0); // ((void*)0x00)


    /**
     * Indicator that the next value in the \p extra parameter to
     * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
     * parameters used for launching kernel \p f.  This buffer needs to
     * honor all alignment/padding requirements of the individual parameters.
     * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
     * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no
     * effect.
     */
    public static final Pointer CU_LAUNCH_PARAM_BUFFER_POINTER = new ConstantPointer(1); //((void*)0x01)

    /**
     * Indicator that the next value in the \p extra parameter to
     * ::cuLaunchKernel will be a pointer to a size_t which contains the
     * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER.
     * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
     * in the \p extra array if the value associated with
     * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
     */
    public static final Pointer CU_LAUNCH_PARAM_BUFFER_SIZE = new ConstantPointer(2); //   ((void*)0x02)


    /**
     * Private inner class for the constant stream values
     */
    private static class ConstantCUstream extends CUstream
    {
        private ConstantCUstream(long value)
        {
            super(value);
        }
    }
    
    /**
     * Stream handle that can be passed as a CUstream to use an implicit stream
     * with legacy synchronization behavior.
     */
    public static final CUstream CU_STREAM_LEGACY = new ConstantCUstream(0x1);
    
    /**
     * Stream handle that can be passed as a CUstream to use an implicit stream
     * with per-thread synchronization behavior.
     */
    public static final CUstream CU_STREAM_PER_THREAD = new ConstantCUstream(0x2);
    
    
    /**
     * Whether a CudaException should be thrown if a method is about
     * to return a result code that is not CUresult.CUDA_SUCCESS
     */
    private static boolean exceptionsEnabled = false;


    static
    {
        LibUtils.loadLibrary("JCudaDriver");
    }

    /* Private constructor to prevent instantiation */
    private JCudaDriver()
    {
    }

    /**
     * Set the specified log level for the JCuda driver library.

     * 

     * Currently supported log levels:
     * 

     * LOG_QUIET: Never print anything 

     * LOG_ERROR: Print error messages 

     * LOG_TRACE: Print a trace of all native function calls 

     *
     * @param logLevel The log level to use.
     */
    public static void setLogLevel(LogLevel logLevel)
    {
        setLogLevel(logLevel.ordinal());
    }

    private static native void setLogLevel(int logLevel);


    /**
     * Enables or disables exceptions. By default, the methods of this class
     * only return the CUresult error code from the underlying CUDA function.
     * If exceptions are enabled, a CudaException with a detailed error
     * message will be thrown if a method is about to return a result code
     * that is not CUresult.CUDA_SUCCESS
     *
     * @param enabled Whether exceptions are enabled
     */
    public static void setExceptionsEnabled(boolean enabled)
    {
        exceptionsEnabled = enabled;
    }

    /**
     * If the given result is different to CUresult.CUDA_SUCCESS and
     * exceptions have been enabled, this method will throw a
     * CudaException with an error message that corresponds to the
     * given result code. Otherwise, the given result is simply
     * returned.
     *
     * @param result The result to check
     * @return The result that was given as the parameter
     * @throws CudaException If exceptions have been enabled and
     * the given result code is not CUresult.CUDA_SUCCESS
     */
    private static int checkResult(int result) {
        if (exceptionsEnabled && result != CUresult.CUDA_SUCCESS) {
            throw new CudaException(CUresult.stringFor(result));
        }
        return result;
    }

    /**
     * Returns the given (address) value, adjusted to have
     * the given alignment. This function may be used to
     * align the parameters for a kernel call according
     * to their alignment requirements.
     *
     * @param value The address value
     * @param alignment The desired alignment
     * @return The aligned address value
     * @deprecated This method was intended for a simpler
     * kernel parameter setup in earlier CUDA versions,
     * and should not be required any more. It may be
     * removed in future releases. 
     */
    public static int align(int value, int alignment)
    {
        return (((value) + (alignment) - 1) & ~((alignment) - 1));
    }


    /**
     * A wrapper function for 
     * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)}
     * which allows passing in the options for the JIT compiler, and obtaining
     * the output of the JIT compiler via a {@link JITOptions} object. 

     * 

     * Note: This method should be considered as preliminary,
     * and might change in future releases. 
     *
     */    
    public static int cuModuleLoadDataJIT(CUmodule module, Pointer pointer, JITOptions jitOptions)
    {
        return cuModuleLoadDataJITNative(module, pointer, jitOptions);
    }
    private static native int cuModuleLoadDataJITNative(CUmodule module, Pointer pointer, JITOptions jitOptions);


    
    /**
     *      * Gets the string description of an error code
     *
     * Sets *pStr to the address of a NULL-terminated string description
     * of the error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
     *
     * @param error - Error code to convert to string
     * @param pStr - Address of the string pointer.
     *
     * @return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE
     *
     * @see CUresult
     */
    public static int cuGetErrorString(int error, String pStr[])
    {
        return checkResult(cuGetErrorStringNative(error, pStr));
    }
    private static native int cuGetErrorStringNative(int error, String pStr[]);

    /**
     *      * Gets the string representation of an error code enum name
     *
     * Sets *pStr to the address of a NULL-terminated string representation
     * of the name of the enum error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
     * @param error - Error code to convert to string
     * @param pStr - Address of the string pointer.
     *
     * @return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE
     *
     * @see CUresult
     */
    public static int cuGetErrorName(int error, String pStr[])
    {
        return checkResult(cuGetErrorNameNative(error, pStr));
    }
    private static native int cuGetErrorNameNative(int error, String pStr[]);
    
    

    /**
     * Initialize the CUDA driver API.
     * 
     *      * CUresult cuInit (
     *      unsigned int  Flags )
     * 
     * 
     *   Initialize the CUDA driver API. 
     *     Initializes the driver API and must be called before any other function
     *     from the driver API.
     *     Currently, the Flags parameter
     *     must be 0. If cuInit() has not been called, any function from the
     *     driver API will return CUDA_ERROR_NOT_INITIALIZED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches. 
     *     
     *   
     *   
     * 
     * 
     * @param Flags Initialization flag for CUDA.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     */    
    public static int cuInit(int Flags)
    {
        return checkResult(cuInitNative(Flags));
    }

    private static native int cuInitNative(int Flags);


    /**
     * Returns a handle to a compute device.
     * 
     *      * CUresult cuDeviceGet (
     *      CUdevice* device,
     *      int  ordinal )
     * 
     * 
     *   Returns a handle to a compute device. 
     *     Returns in *device a device handle given an ordinal in the
     *     range [0, cuDeviceGetCount()-1].
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param device Returned device handle
     * @param ordinal Device number to get handle for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceGet(CUdevice device, int ordinal)
    {
        return checkResult(cuDeviceGetNative(device, ordinal));
    }

    private static native int cuDeviceGetNative(CUdevice device, int ordinal);


    /**
     * Returns the number of compute-capable devices.
     * 
     *      * CUresult cuDeviceGetCount (
     *      int* count )
     * 
     * 
     *   Returns the number of compute-capable
     *     devices.  Returns in *count the number of devices with
     *     compute capability greater than or equal to 1.0 that are available for
     *     execution. If there is
     *     no such device, cuDeviceGetCount()
     *     returns 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param count Returned number of compute-capable devices
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceGetCount(int count[])
    {
        return checkResult(cuDeviceGetCountNative(count));
    }

    private static native int cuDeviceGetCountNative(int count[]);


    /**
     * Returns an identifer string for the device.
     * 
     *      * CUresult cuDeviceGetName (
     *      char* name,
     *      int  len,
     *      CUdevice dev )
     * 
     * 
     *   Returns an identifer string for the
     *     device.  Returns an ASCII string identifying the device dev
     *     in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param name Returned identifier string for the device
     * @param len Maximum length of string to store in name
     * @param dev Device to get identifier string for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceGetName(byte name[], int len, CUdevice dev)
    {
        return checkResult(cuDeviceGetNameNative(name, len, dev));
    }

    private static native int cuDeviceGetNameNative(byte name[], int len, CUdevice dev);


    /**
     * Returns the compute capability of the device.
     * 
     *      * CUresult cuDeviceComputeCapability (
     *      int* major,
     *      int* minor,
     *      CUdevice dev )
     * 
     * 
     *   Returns the compute capability of the
     *     device.  
     *     DeprecatedThis function was deprecated
     *     as of CUDA 5.0 and its functionality superceded by
     *     cuDeviceGetAttribute().
     *   
     *   Returns in *major and *minor the major and minor revision numbers that define the
     *     compute capability of the device dev.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param major Major revision number
     * @param minor Minor revision number
     * @param dev Device handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceComputeCapability(int major[], int minor[], CUdevice dev)
    {
        return checkResult(cuDeviceComputeCapabilityNative(major, minor, dev));
    }

    private static native int cuDeviceComputeCapabilityNative(int major[], int minor[], CUdevice dev);

    
    public static int cuDevicePrimaryCtxRetain(CUcontext pctx, CUdevice dev)
    {
        return checkResult(cuDevicePrimaryCtxRetainNative(pctx, dev));
    }
    private static native int cuDevicePrimaryCtxRetainNative(CUcontext pctx, CUdevice dev);
    
    
    
    public static int cuDevicePrimaryCtxRelease(CUdevice dev)
    {
        return checkResult(cuDevicePrimaryCtxReleaseNative(dev));
    }
    private static native int cuDevicePrimaryCtxReleaseNative(CUdevice dev);

    
    public static int cuDevicePrimaryCtxSetFlags(CUdevice dev, int flags)
    {
        return checkResult(cuDevicePrimaryCtxSetFlagsNative(dev, flags));
    }
    private static native int cuDevicePrimaryCtxSetFlagsNative(CUdevice dev, int flags);

    
    
    
    /**
     * Returns the total amount of memory on the device.
     * 
     *      * CUresult cuDeviceTotalMem (
     *      size_t* bytes,
     *      CUdevice dev )
     * 
     * 
     *   Returns the total amount of memory on
     *     the device.  Returns in *bytes the total amount of memory
     *     available on the device dev in bytes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param bytes Returned memory available on device in bytes
     * @param dev Device handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     */    
    public static int cuDeviceTotalMem(long bytes[], CUdevice dev)
    {
        return checkResult(cuDeviceTotalMemNative(bytes, dev));
    }

    private static native int cuDeviceTotalMemNative(long bytes[], CUdevice dev);


    /**
     * Returns properties for a selected device.
     * 
     *      * CUresult cuDeviceGetProperties (
     *      CUdevprop* prop,
     *      CUdevice dev )
     * 
     * 
     *   Returns properties for a selected device.
     *     DeprecatedThis function was deprecated
     *     as of CUDA 5.0 and replaced by cuDeviceGetAttribute().
     *   
     *   Returns in *prop the properties
     *     of device dev. The CUdevprop structure is defined as:
     *   
     *        typedef struct CUdevprop_st {
     *      int maxThreadsPerBlock;
     *      int maxThreadsDim[3];
     *      int maxGridSize[3];
     *      int sharedMemPerBlock;
     *      int totalConstantMemory;
     *      int SIMDWidth;
     *      int memPitch;
     *      int regsPerBlock;
     *      int clockRate;
     *      int textureAlign
     *   } CUdevprop;
     *   where:
     *   
     *     
     *       maxThreadsPerBlock is the
     *         maximum number of threads per block;
     *       
     *     
     *     
     *       maxThreadsDim[3] is the maximum
     *         sizes of each dimension of a block;
     *       
     *     
     *     
     *       maxGridSize[3] is the maximum
     *         sizes of each dimension of a grid;
     *       
     *     
     *     
     *       sharedMemPerBlock is the total
     *         amount of shared memory available per block in bytes;
     *       
     *     
     *     
     *       totalConstantMemory is the
     *         total amount of constant memory available on the device in bytes;
     *       
     *     
     *     
     *       SIMDWidth is the warp
     *         size;
     *       
     *     
     *     
     *       memPitch is the maximum pitch
     *         allowed by the memory copy functions that involve memory regions
     *         allocated through cuMemAllocPitch();
     *       
     *     
     *     
     *       regsPerBlock is the total
     *         number of registers available per block;
     *       
     *     
     *     
     *       clockRate is the clock frequency
     *         in kilohertz;
     *       
     *     
     *     
     *       textureAlign is the alignment
     *         requirement; texture base addresses that are aligned to textureAlign
     *         bytes do not need an offset
     *         applied to texture fetches.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param prop Returned properties of device
     * @param dev Device to get properties for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev)
    {
        return checkResult(cuDeviceGetPropertiesNative(prop, dev));
    }

    private static native int cuDeviceGetPropertiesNative(CUdevprop prop, CUdevice dev);


    /**
     * Returns information about the device.
     * 
     *      * CUresult cuDeviceGetAttribute (
     *      int* pi,
     *      CUdevice_attribute attrib,
     *      CUdevice dev )
     * 
     * 
     *   Returns information about the device. 
     *     Returns in *pi the integer value of the attribute attrib on device dev. The supported attributes are:
     *   

     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads
     *         per block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X:
     *         Maximum x-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y:
     *         Maximum y-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z:
     *         Maximum z-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X:
     *         Maximum x-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y:
     *         Maximum y-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z:
     *         Maximum z-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
     *         shared memory available to a thread block in bytes; this amount is
     *         shared by all thread blocks simultaneously
     *         resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device
     *         for __constant__ variables in a CUDA C kernel in bytes;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_WARP_SIZE:
     *         Warp size in threads;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_PITCH:
     *         Maximum pitch in bytes allowed by the memory copy functions that
     *         involve memory regions allocated through cuMemAllocPitch();
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width for
     *         a 1D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum
     *         mipmapped 1D texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D texture
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width for
     *         a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height
     *         for a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch in
     *         bytes for a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum
     *         mipmapped 2D texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum
     *         mipmapped 2D texture height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D texture
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D texture
     *         depth;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: Alternate
     *         maximum 3D texture width, 0 if no alternate maximum 3D texture size is
     *         supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: Alternate
     *         maximum 3D texture height, 0 if no alternate maximum 3D texture size
     *         is supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: Alternate
     *         maximum 3D texture depth, 0 if no alternate maximum 3D texture size is
     *         supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: Maximum cubemap
     *         texture width or height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: Maximum 1D
     *         layered texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: Maximum layers
     *         in a 1D layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: Maximum 2D
     *         layered texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: Maximum 2D
     *         layered texture height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: Maximum layers
     *         in a 2D layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: Maximum
     *         cubemap layered texture width or height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: Maximum
     *         layers in a cubemap layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: Maximum 1D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: Maximum 2D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: Maximum 2D surface
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: Maximum 3D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: Maximum 3D surface
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: Maximum 3D surface
     *         depth;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: Maximum 1D
     *         layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: Maximum layers
     *         in a 1D layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: Maximum 2D
     *         layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: Maximum 2D
     *         layered surface height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: Maximum layers
     *         in a 2D layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: Maximum cubemap
     *         surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: Maximum
     *         cubemap layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: Maximum
     *         layers in a cubemap layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit
     *         registers available to a thread block; this number is shared by all
     *         thread blocks simultaneously
     *         resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CLOCK_RATE:
     *         Typical clock frequency in kilohertz;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT:
     *         Alignment requirement; texture base addresses aligned to textureAlign
     *         bytes do not need an offset applied to texture fetches;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment
     *         requirement for 2D texture references bound to pitched memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_GPU_OVERLAP:
     *         1 if the device can concurrently copy memory between host and device
     *         while executing a kernel, or 0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors
     *         on the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT:
     *         1 if there is a run time limit for kernels executed on the device, or
     *         0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_INTEGRATED:
     *         1 if the device is integrated with the memory subsystem, or 0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY:
     *         1 if the device can map host memory into the CUDA address space, or 0
     *         if not;
     *       
     *     
     *     
     *       
     *         CU_DEVICE_ATTRIBUTE_COMPUTE_MODE:
     *         Compute mode that device is currently in. Available modes are as
     *         follows:
     *         
     *           
     *             CU_COMPUTEMODE_DEFAULT:
     *               Default mode - Device is not restricted and can have multiple CUDA
     *               contexts present at a single time.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_EXCLUSIVE:
     *               Compute-exclusive mode - Device can have only one CUDA context present
     *               on it at a time.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_PROHIBITED:
     *               Compute-prohibited mode - Device is prohibited from creating new CUDA
     *               contexts.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode -
     *               Device can have only one context used by a single process at a time.
     *             
     *           
     *         
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS:
     *         1 if the device supports executing multiple kernels within the same
     *         context simultaneously, or 0 if not. It is not guaranteed
     *         that multiple kernels will be
     *         resident on the device concurrently so this feature should not be
     *         relied upon for correctness;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_ECC_ENABLED:
     *         1 if error correction is enabled on the device, 0 if error correction
     *         is disabled or not supported by the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PCI_BUS_ID:
     *         PCI bus identifier of the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID:
     *         PCI device (also known as slot) identifier of the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TCC_DRIVER:
     *         1 if the device is using a TCC driver. TCC is only available on Tesla
     *         hardware running Windows Vista or later;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE:
     *         Peak memory clock frequency in kilohertz;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width
     *         in bits;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE:
     *         Size of L2 cache in bytes. 0 if the device doesn't have L2 cache;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident
     *         threads per multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING:
     *         1 if the device shares a unified address space with the host, or 0 if
     *         not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability
     *         version number;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability
     *         version number;
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pi Returned device attribute value
     * @param attrib Device attribute to query
     * @param dev Device handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */    
    public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev)
    {
        return checkResult(cuDeviceGetAttributeNative(pi, attrib, dev));
    }

    private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev);


    /**
     * Returns the CUDA driver version.
     * 
     *      * CUresult cuDriverGetVersion (
     *      int* driverVersion )
     * 
     * 
     *   Returns the CUDA driver version.  Returns
     *     in *driverVersion the version number of the installed CUDA
     *     driver. This function automatically returns CUDA_ERROR_INVALID_VALUE
     *     if the driverVersion argument is NULL.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches. 
     *     
     *   
     *   
     * 
     * 
     * @param driverVersion Returns the CUDA driver version
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
     * 
     */    
    public static int cuDriverGetVersion (int driverVersion[])
    {
        return checkResult(cuDriverGetVersionNative(driverVersion));
    }
    private static native int cuDriverGetVersionNative(int driverVersion[]);



    /**
     * Create a CUDA context.
     * 
     *      * CUresult cuCtxCreate (
     *      CUcontext* pctx,
     *      unsigned int  flags,
     *      CUdevice dev )
     * 
     * 
     *   Create a CUDA context.  Creates a new
     *     CUDA context and associates it with the calling thread. The flags parameter is described below. The context is created with
     *     a usage count of 1 and the caller of cuCtxCreate() must call
     *     cuCtxDestroy() or when done using the context. If a context is already
     *     current to the thread, it is supplanted by the newly created context
     *     and may be restored by a subsequent call
     *     to cuCtxPopCurrent().
     *   
     *   The three LSBs of the flags
     *     parameter can be used to control how the OS thread, which owns the CUDA
     *     context at the time of an API call, interacts with
     *     the OS scheduler when waiting for results
     *     from the GPU. Only one of the scheduling flags can be set when creating
     *     a context.
     *   
     *   
     *     
     *       CU_CTX_SCHED_AUTO: The default
     *         value if the flags parameter is zero, uses a heuristic based
     *         on the number of active CUDA contexts in the process C and the number
     *         of logical
     *         processors in the system P. If
     *         C > P, then CUDA will yield to other OS threads when waiting for
     *         the GPU, otherwise CUDA will
     *         not yield while waiting for
     *         results and actively spin on the processor.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_SPIN: Instruct
     *         CUDA to actively spin when waiting for results from the GPU. This can
     *         decrease latency when waiting for the GPU,
     *         but may lower the performance
     *         of CPU threads if they are performing work in parallel with the CUDA
     *         thread.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_YIELD: Instruct
     *         CUDA to yield its thread when waiting for results from the GPU. This
     *         can increase latency when waiting for the
     *         GPU, but can increase the
     *         performance of CPU threads performing work in parallel with the GPU.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_BLOCKING_SYNC:
     *         Instruct CUDA to block the CPU thread on a synchronization primitive
     *         when waiting for the GPU to finish work.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_BLOCKING_SYNC: Instruct
     *         CUDA to block the CPU thread on a synchronization primitive when
     *         waiting for the GPU to finish work. 
     *       
     *       Deprecated:
     *         This flag was deprecated as of CUDA 4.0 and was replaced with
     *         CU_CTX_SCHED_BLOCKING_SYNC.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_MAP_HOST: Instruct CUDA
     *         to support mapped pinned allocations. This flag must be set in order
     *         to allocate pinned host memory that is
     *         accessible to the GPU.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_LMEM_RESIZE_TO_MAX:
     *         Instruct CUDA to not reduce local memory after resizing local memory
     *         for a kernel. This can prevent thrashing by local memory
     *         allocations when launching many
     *         kernels with high local memory usage at the cost of potentially
     *         increased memory usage.
     *       
     *     
     *   
     *   
     *   Context creation will fail with
     *     CUDA_ERROR_UNKNOWN if the compute mode of the device is
     *     CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail
     *     with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to
     *     CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the
     *     device. The function cuDeviceGetAttribute() can be used with
     *     CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the
     *     device. The nvidia-smi tool can be used to set the compute mode for
     *     devices. Documentation
     *     for nvidia-smi can be obtained by passing
     *     a -h option to it.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pctx Returned context handle of the new context
     * @param flags Context creation flags
     * @param dev Device to create context on
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev)
    {
        return checkResult(cuCtxCreateNative(pctx, flags, dev));
    }

    private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev);


    /**
     * Destroy a CUDA context.
     * 
     *      * CUresult cuCtxDestroy (
     *      CUcontext ctx )
     * 
     * 
     *   Destroy a CUDA context.  Destroys the
     *     CUDA context specified by ctx. The context ctx will
     *     be destroyed regardless of how many threads it is current to. It is
     *     the responsibility of the calling function to ensure
     *     that no API call issues using ctx while cuCtxDestroy() is executing.
     *   
     *   If ctx is current to the
     *     calling thread then ctx will also be popped from the current
     *     thread's context stack (as though cuCtxPopCurrent() were called). If
     *     ctx is current to other threads, then ctx will
     *     remain current to those threads, and attempting to access ctx
     *     from those threads will result in the error
     *     CUDA_ERROR_CONTEXT_IS_DESTROYED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param ctx Context to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxDestroy(CUcontext ctx)
    {
        return checkResult(cuCtxDestroyNative(ctx));
    }

    private static native int cuCtxDestroyNative(CUcontext ctx);


    /**
     * Increment a context's usage-count.
     * 
     *      * CUresult cuCtxAttach (
     *      CUcontext* pctx,
     *      unsigned int  flags )
     * 
     * 
     *   Increment a context's usage-count.  
     *     DeprecatedNote that this function is
     *     deprecated and should not be used.
     *   
     *   Increments the usage count of the
     *     context and passes back a context handle in *pctx that must
     *     be passed to cuCtxDetach() when the application is done with the
     *     context. cuCtxAttach() fails if there is no context current to the
     *     thread.
     *   
     *   Currently, the flags parameter
     *     must be 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pctx Returned context handle of the current context
     * @param flags Context attach flags (must be 0)
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxDetach
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxAttach(CUcontext pctx, int flags)
    {
        return checkResult(cuCtxAttachNative(pctx, flags));
    }

    private static native int cuCtxAttachNative(CUcontext pctx, int flags);


    /**
     * Decrement a context's usage-count.
     * 
     *      * CUresult cuCtxDetach (
     *      CUcontext ctx )
     * 
     * 
     *   Decrement a context's usage-count.  
     *     DeprecatedNote that this function is
     *     deprecated and should not be used.
     *   
     *   Decrements the usage count of the
     *     context ctx, and destroys the context if the usage count goes
     *     to 0. The context must be a handle that was passed back by cuCtxCreate()
     *     or cuCtxAttach(), and must be current to the calling thread.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param ctx Context to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxDetach(CUcontext ctx)
    {
        return checkResult(cuCtxDetachNative(ctx));
    }

    private static native int cuCtxDetachNative(CUcontext ctx);


    /**
     * Pushes a context on the current CPU thread.
     * 
     *      * CUresult cuCtxPushCurrent (
     *      CUcontext ctx )
     * 
     * 
     *   Pushes a context on the current CPU
     *     thread.  Pushes the given context ctx onto the CPU thread's
     *     stack of current contexts. The specified context becomes the CPU
     *     thread's current context, so all CUDA
     *     functions that operate on the current
     *     context are affected.
     *   
     *   The previous current context may be made
     *     current again by calling cuCtxDestroy() or cuCtxPopCurrent().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param ctx Context to push
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxPushCurrent(CUcontext ctx)
    {
        return checkResult(cuCtxPushCurrentNative(ctx));
    }

    private static native int cuCtxPushCurrentNative(CUcontext ctx);


    /**
     * Pops the current CUDA context from the current CPU thread.
     * 
     *      * CUresult cuCtxPopCurrent (
     *      CUcontext* pctx )
     * 
     * 
     *   Pops the current CUDA context from the
     *     current CPU thread.  Pops the current CUDA context from the CPU thread
     *     and passes back
     *     the old context handle in *pctx.
     *     That context may then be made current to a different CPU thread by
     *     calling cuCtxPushCurrent().
     *   
     *   If a context was current to the CPU
     *     thread before cuCtxCreate() or cuCtxPushCurrent() was called, this
     *     function makes that context current to the CPU thread again.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pctx Returned new context handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxPopCurrent(CUcontext pctx)
    {
        return checkResult(cuCtxPopCurrentNative(pctx));
    }

    private static native int cuCtxPopCurrentNative(CUcontext pctx);


    /**
     * Binds the specified CUDA context to the calling CPU thread.
     * 
     *      * CUresult cuCtxSetCurrent (
     *      CUcontext ctx )
     * 
     * 
     *   Binds the specified CUDA context to the
     *     calling CPU thread.  Binds the specified CUDA context to the calling
     *     CPU thread. If
     *     ctx is NULL then the CUDA
     *     context previously bound to the calling CPU thread is unbound and
     *     CUDA_SUCCESS is returned.
     *   
     *   If there exists a CUDA context stack on
     *     the calling CPU thread, this will replace the top of that stack with
     *     ctx. If ctx is NULL then this will be equivalent
     *     to popping the top of the calling CPU thread's CUDA context stack (or
     *     a no-op if the
     *     calling CPU thread's CUDA context stack
     *     is empty).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param ctx Context to bind to the calling CPU thread
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxGetCurrent
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     */    
    public static int cuCtxSetCurrent(CUcontext ctx)
    {
        return checkResult(cuCtxSetCurrentNative(ctx));
    }

    private static native int cuCtxSetCurrentNative(CUcontext ctx);


    /**
     * Returns the CUDA context bound to the calling CPU thread.
     * 
     *      * CUresult cuCtxGetCurrent (
     *      CUcontext* pctx )
     * 
     * 
     *   Returns the CUDA context bound to the
     *     calling CPU thread.  Returns in *pctx the CUDA context bound
     *     to the calling CPU thread. If no context is bound to the calling CPU
     *     thread then *pctx is set to NULL and CUDA_SUCCESS is
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pctx Returned context handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * 
     * @see JCudaDriver#cuCtxSetCurrent
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     */    
    public static int cuCtxGetCurrent(CUcontext pctx)
    {
        return checkResult(cuCtxGetCurrentNative(pctx));
    }

    private static native int cuCtxGetCurrentNative(CUcontext pctx);


    /**
     * Returns the device ID for the current context.
     * 
     *      * CUresult cuCtxGetDevice (
     *      CUdevice* device )
     * 
     * 
     *   Returns the device ID for the current
     *     context.  Returns in *device the ordinal of the current
     *     context's device.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param device Returned device ID for the current context
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxGetDevice(CUdevice device)
    {
        return checkResult(cuCtxGetDeviceNative(device));
    }

    private static native int cuCtxGetDeviceNative(CUdevice device);


    public static int cuCtxGetFlags(int flags[])
    {
        return checkResult(cuCtxGetFlagsNative(flags));
    }
    private static native int cuCtxGetFlagsNative(int flags[]);
    
    /**
     * Block for a context's tasks to complete.
     * 
     *      * CUresult cuCtxSynchronize (
     *      void )
     * 
     * 
     *   Block for a context's tasks to complete.
     *     Blocks until the device has completed all preceding requested tasks.
     *     cuCtxSynchronize() returns an error if one of the preceding tasks
     *     failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC
     *     flag, the CPU thread will block until the GPU context has finished its
     *     work.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     */    
    public static int cuCtxSynchronize()
    {
        return checkResult(cuCtxSynchronizeNative());
    }

    private static native int cuCtxSynchronizeNative();


    /**
     * Loads a compute module.
     * 
     *      * CUresult cuModuleLoad (
     *      CUmodule* module,
     *      const char* fname )
     * 
     * 
     *   Loads a compute module.  Takes a filename
     *     fname and loads the corresponding module module
     *     into the current context. The CUDA driver API does not attempt to
     *     lazily allocate the resources needed by a module; if the
     *     memory for functions and data (constant
     *     and global) needed by the module cannot be allocated, cuModuleLoad()
     *     fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc
     *     or handwritten, or a fatbin file as output by nvcc
     *     from toolchain 4.0 or later.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param module Returned module
     * @param fname Filename of module to load
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleLoad(CUmodule module, String fname)
    {
        return checkResult(cuModuleLoadNative(module, fname));
    }

    private static native int cuModuleLoadNative(CUmodule module, String fname);


    /**
     * Load a module's data.
     * 
     *      * CUresult cuModuleLoadData (
     *      CUmodule* module,
     *      const void* image )
     * 
     * 
     *   Load a module's data.  Takes a pointer
     *     image and loads the corresponding module module
     *     into the current context. The pointer may be obtained by mapping a
     *     cubin or PTX or fatbin file, passing a cubin or PTX or
     *     fatbin file as a NULL-terminated text
     *     string, or incorporating a cubin or fatbin object into the executable
     *     resources and
     *     using operating system calls such as
     *     Windows FindResource() to obtain the pointer.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param module Returned module
     * @param image Module data to load
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleLoadData(CUmodule module, byte image[])
    {
        return checkResult(cuModuleLoadDataNative(module, image));
    }

    private static native int cuModuleLoadDataNative(CUmodule module, byte image[]);




    /**
     * Load a module's data with options.

     * 

     * Note: It is hardly possible to properly pass in the required 
     * option values for this method. Thus, the arguments here must be 

     * numOptions=0 

     * options=new int[0] 

     * optionValues=Pointer.to(new int[0]))

     * For passing in real options, use 
     * {@link #cuModuleLoadDataJIT(CUmodule, Pointer, JITOptions)} instead
     * 
     *      * CUresult cuModuleLoadDataEx (
     *      CUmodule* module,
     *      const void* image,
     *      unsigned int  numOptions,
     *      CUjit_option* options,
     *      void** optionValues )
     * 
     * 
     *   Load a module's data with options.  Takes
     *     a pointer image and loads the corresponding module module into the current context. The pointer may be obtained by
     *     mapping a cubin or PTX or fatbin file, passing a cubin or PTX or
     *     fatbin file as a NULL-terminated text
     *     string, or incorporating a cubin or fatbin object into the executable
     *     resources and
     *     using operating system calls such as
     *     Windows FindResource() to obtain the pointer. Options are
     *     passed as an array via options and any corresponding
     *     parameters are passed in optionValues. The number of total
     *     options is supplied via numOptions. Any outputs will be
     *     returned via optionValues. Supported options are (types for
     *     the option values are specified in parentheses after the option name):
     *   
     *   
     *     
     *       CU_JIT_MAX_REGISTERS: (unsigned
     *         int) input specifies the maximum number of registers per thread;
     *       
     *     
     *     
     *       CU_JIT_THREADS_PER_BLOCK:
     *         (unsigned int) input specifies number of threads per block to target
     *         compilation for; output returns the number of threads
     *         the compiler actually targeted;
     *       
     *     
     *     
     *       CU_JIT_WALL_TIME: (float)
     *         output returns the float value of wall clock time, in milliseconds,
     *         spent compiling the PTX code;
     *       
     *     
     *     
     *       CU_JIT_INFO_LOG_BUFFER: (char*)
     *         input is a pointer to a buffer in which to print any informational log
     *         messages from PTX assembly (the buffer size
     *         is specified via option
     *         CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
     *       
     *     
     *     
     *       CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES:
     *         (unsigned int) input is the size in bytes of the buffer; output is the
     *         number of bytes filled with messages;
     *       
     *     
     *     
     *       CU_JIT_ERROR_LOG_BUFFER:
     *         (char*) input is a pointer to a buffer in which to print any error log
     *         messages from PTX assembly (the buffer size is specified
     *         via option
     *         CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
     *       
     *     
     *     
     *       CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES:
     *         (unsigned int) input is the size in bytes of the buffer; output is the
     *         number of bytes filled with messages;
     *       
     *     
     *     
     *       CU_JIT_OPTIMIZATION_LEVEL:
     *         (unsigned int) input is the level of optimization to apply to generated
     *         code (0 - 4), with 4 being the default and highest
     *         level;
     *       
     *     
     *     
     *       CU_JIT_TARGET_FROM_CUCONTEXT:
     *         (No option value) causes compilation target to be determined based on
     *         current attached context (default);
     *       
     *     
     *     
     *       
     *         CU_JIT_TARGET: (unsigned int
     *         for enumerated type CUjit_target_enum) input is the compilation target
     *         based on supplied CUjit_target_enum;
     *         possible values are:
     *         
     *           
     *             CU_TARGET_COMPUTE_10
     *           
     *           
     *             CU_TARGET_COMPUTE_11
     *           
     *           
     *             CU_TARGET_COMPUTE_12
     *           
     *           
     *             CU_TARGET_COMPUTE_13
     *           
     *           
     *             CU_TARGET_COMPUTE_20
     *           
     *         
     *       
     *     
     *     
     *       
     *         CU_JIT_FALLBACK_STRATEGY:
     *         (unsigned int for enumerated type CUjit_fallback_enum) chooses fallback
     *         strategy if matching cubin is not found; possible
     *         values are:
     *         
     *           
     *             CU_PREFER_PTX
     *           
     *           
     *             CU_PREFER_BINARY
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param module Returned module
     * @param image Module data to load
     * @param numOptions Number of options
     * @param options Options for JIT
     * @param optionValues Option values for JIT
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleLoadDataEx (CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues)
    {
        return checkResult(cuModuleLoadDataExNative(phMod, p, numOptions, options, optionValues));
    }
    private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues);



    /**
     * Load a module's data.
     * 
     *      * CUresult cuModuleLoadFatBinary (
     *      CUmodule* module,
     *      const void* fatCubin )
     * 
     * 
     *   Load a module's data.  Takes a pointer
     *     fatCubin and loads the corresponding module module
     *     into the current context. The pointer represents a fat binary object,
     *     which is a collection of different cubin and/or PTX
     *     files, all representing the same device
     *     code, but compiled and optimized for different architectures.
     *   
     *   Prior to CUDA 4.0, there was no
     *     documented API for constructing and using fat binary objects by
     *     programmers. Starting with
     *     CUDA 4.0, fat binary objects can be
     *     constructed by providing the -fatbin option to nvcc.
     *     More information can be found in the nvcc document.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param module Returned module
     * @param fatCubin Fat binary to load
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleLoadFatBinary(CUmodule module, byte fatCubin[])
    {
        return checkResult(cuModuleLoadFatBinaryNative(module, fatCubin));
    }

    private static native int cuModuleLoadFatBinaryNative(CUmodule module, byte fatCubin[]);


    /**
     * Unloads a module.
     * 
     *      * CUresult cuModuleUnload (
     *      CUmodule hmod )
     * 
     * 
     *   Unloads a module.  Unloads a module hmod from the current context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hmod Module to unload
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     */    
    public static int cuModuleUnload(CUmodule hmod)
    {
        return checkResult(cuModuleUnloadNative(hmod));
    }

    private static native int cuModuleUnloadNative(CUmodule hmod);


    /**
     * Returns a function handle.
     * 
     *      * CUresult cuModuleGetFunction (
     *      CUfunction* hfunc,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a function handle.  Returns in
     *     *hfunc the handle of the function of name name
     *     located in module hmod. If no function of that name exists,
     *     cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Returned function handle
     * @param hmod Module to retrieve function from
     * @param name Name of function to retrieve
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     * 
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetFunctionNative(hfunc, hmod, name));
    }

    private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name);


    /**
     * Returns a global pointer from a module.
     * 
     *      * CUresult cuModuleGetGlobal (
     *      CUdeviceptr* dptr,
     *      size_t* bytes,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a global pointer from a module. 
     *     Returns in *dptr and *bytes the base pointer and
     *     size of the global of name name located in module hmod. If no variable of that name exists, cuModuleGetGlobal()
     *     returns CUDA_ERROR_NOT_FOUND. Both parameters dptr and bytes are optional. If one of them is NULL, it is ignored.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Returned global device pointer
     * @param bytes Returned global size in bytes
     * @param hmod Module to retrieve global from
     * @param name Name of global to retrieve
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleGetGlobal(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetGlobalNative(dptr, bytes, hmod, name));
    }

    private static native int cuModuleGetGlobalNative(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name);


    /**
     * Returns a handle to a texture reference.
     * 
     *      * CUresult cuModuleGetTexRef (
     *      CUtexref* pTexRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a handle to a texture reference.
     *     Returns in *pTexRef the handle of the texture reference of
     *     name name in the module hmod. If no texture
     *     reference of that name exists, cuModuleGetTexRef() returns
     *     CUDA_ERROR_NOT_FOUND. This texture reference handle should not be
     *     destroyed, since it will be destroyed when the module is unloaded.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pTexRef Returned texture reference
     * @param hmod Module to retrieve texture reference from
     * @param name Name of texture reference to retrieve
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetTexRefNative(pTexRef, hmod, name));
    }

    private static native int cuModuleGetTexRefNative(CUtexref pTexRef, CUmodule hmod, String name);


    /**
     * Returns a handle to a surface reference.
     * 
     *      * CUresult cuModuleGetSurfRef (
     *      CUsurfref* pSurfRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a handle to a surface reference.
     *     Returns in *pSurfRef the handle of the surface reference of
     *     name name in the module hmod. If no surface
     *     reference of that name exists, cuModuleGetSurfRef() returns
     *     CUDA_ERROR_NOT_FOUND.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pSurfRef Returned surface reference
     * @param hmod Module to retrieve surface reference from
     * @param name Name of surface reference to retrieve
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     * 
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */    
    public static int cuModuleGetSurfRef(CUsurfref pSurfRef, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetSurfRefNative(pSurfRef, hmod, name));
    }
    private static native int cuModuleGetSurfRefNative(CUsurfref pSurfRef, CUmodule hmod, String name);


    public static int cuLinkCreate(JITOptions jitOptions, CUlinkState stateOut)
    {
        return checkResult(cuLinkCreateNative(jitOptions, stateOut));
    }
    private static native int cuLinkCreateNative(JITOptions jitOptions, CUlinkState stateOut);
    
    
    public static int cuLinkAddData(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions)
    {
        return checkResult(cuLinkAddDataNative(state, type, data, size, name, jitOptions));
    }
    private static native int cuLinkAddDataNative(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions);

    public static int cuLinkAddFile(CUlinkState state, int type, String path, JITOptions jitOptions)
    {
        return checkResult(cuLinkAddFileNative(state, type, path, jitOptions));
    }
    private static native int cuLinkAddFileNative(CUlinkState state, int type, String path, JITOptions jitOptions);

    
    public static int cuLinkComplete(CUlinkState state, Pointer cubinOut, long sizeOut[])
    {
        return checkResult(cuLinkCompleteNative(state, cubinOut, sizeOut));
    }
    private static native int cuLinkCompleteNative(CUlinkState state, Pointer cubinOut, long sizeOut[]);
    
    
    public static int cuLinkDestroy(CUlinkState state)
    {
        return checkResult(cuLinkDestroyNative(state));
    }
    private static native int cuLinkDestroyNative(CUlinkState state);
    
    

    /**
     * Gets free and total memory.
     * 
     *      * CUresult cuMemGetInfo (
     *      size_t* free,
     *      size_t* total )
     * 
     * 
     *   Gets free and total memory.  Returns in
     *     *free and *total respectively, the free and total
     *     amount of memory available for allocation by the CUDA context, in
     *     bytes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param free Returned free memory in bytes
     * @param total Returned total memory in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemGetInfo(long free[], long total[])
    {
        return checkResult(cuMemGetInfoNative(free, total));
    }

    private static native int cuMemGetInfoNative(long free[], long total[]);


    /**
     * Allocates page-locked host memory.
     * 
     *      * CUresult cuMemHostAlloc (
     *      void** pp,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
     * 
     *   Allocates page-locked host memory. 
     *     Allocates bytesize bytes of host memory that is page-locked
     *     and accessible to the device. The driver tracks the virtual memory
     *     ranges allocated
     *     with this function and automatically
     *     accelerates calls to functions such as cuMemcpyHtoD(). Since the memory
     *     can be accessed directly by the device, it can be read or written with
     *     much higher bandwidth than pageable
     *     memory obtained with functions such as
     *     malloc(). Allocating excessive amounts of pinned memory may degrade
     *     system performance,
     *     since it reduces the amount of memory
     *     available to the system for paging. As a result, this function is best
     *     used sparingly
     *     to allocate staging areas for data
     *     exchange between host and device.
     *   
     *   The Flags parameter enables
     *     different options to be specified that affect the allocation, as
     *     follows.
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_PORTABLE: The
     *         memory returned by this call will be considered as pinned memory by
     *         all CUDA contexts, not just the one that performed
     *         the allocation.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_DEVICEMAP: Maps
     *         the allocation into the CUDA address space. The device pointer to the
     *         memory may be obtained by calling cuMemHostGetDevicePointer(). This
     *         feature is available only on GPUs with compute capability greater than
     *         or equal to 1.1.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_WRITECOMBINED:
     *         Allocates the memory as write-combined (WC). WC memory can be
     *         transferred across the PCI Express bus more quickly on some
     *         system configurations, but
     *         cannot be read efficiently by most CPUs. WC memory is a good option
     *         for buffers that will be written
     *         by the CPU and read by the GPU
     *         via mapped pinned memory or host->device transfers.
     *       
     *     
     *   
     *   
     *   All of these flags are orthogonal to
     *     one another: a developer may allocate memory that is portable, mapped
     *     and/or write-combined
     *     with no restrictions.
     *   
     *   The CUDA context must have been created
     *     with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_DEVICEMAP
     *     flag to have any effect.
     *   
     *   The CU_MEMHOSTALLOC_DEVICEMAP flag may
     *     be specified on CUDA contexts for devices that do not support mapped
     *     pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
     *     because the memory may be mapped into other CUDA contexts via the
     *     CU_MEMHOSTALLOC_PORTABLE flag.
     *   
     *   The memory allocated by this function
     *     must be freed with cuMemFreeHost().
     *   
     *   Note all host memory allocated using
     *     cuMemHostAlloc() will automatically be immediately accessible to all
     *     contexts on all devices which support unified addressing (as may be
     *     queried
     *     using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
     *     Unless the flag CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device
     *     pointer that may be used to access this host memory from those contexts
     *     is always equal to the returned
     *     host pointer *pp. If the flag
     *     CU_MEMHOSTALLOC_WRITECOMBINED is specified, then the function
     *     cuMemHostGetDevicePointer() must be used to query the device pointer,
     *     even if the context supports unified addressing. See Unified Addressing
     *     for additional details.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pp Returned host pointer to page-locked memory
     * @param bytesize Requested allocation size in bytes
     * @param Flags Flags for allocation request
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemHostAlloc(Pointer pp, long bytes, int Flags)
    {
        return checkResult(cuMemHostAllocNative(pp, bytes, Flags));
    }
    private static native int cuMemHostAllocNative(Pointer pp, long bytes, int Flags);


    /**
     * Passes back device pointer of mapped pinned memory.
     * 
     *      * CUresult cuMemHostGetDevicePointer (
     *      CUdeviceptr* pdptr,
     *      void* p,
     *      unsigned int  Flags )
     * 
     * 
     *   Passes back device pointer of mapped
     *     pinned memory.  Passes back the device pointer pdptr
     *     corresponding to the mapped, pinned host buffer p allocated
     *     by cuMemHostAlloc.
     *   
     *   cuMemHostGetDevicePointer() will fail
     *     if the CU_MEMHOSTALLOC_DEVICEMAP flag was not specified at the time
     *     the memory was allocated, or if the function is called on a GPU that
     *     does not support
     *     mapped pinned memory.
     *   
     *   Flags provides for future
     *     releases. For now, it must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pdptr Returned device pointer
     * @param p Host pointer
     * @param Flags Options (must be 0)
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags)
    {
        return checkResult(cuMemHostGetDevicePointerNative(ret, p, Flags));
    }
    private static native int cuMemHostGetDevicePointerNative(CUdeviceptr ret, Pointer p, int Flags);


    /**
     * Passes back flags that were used for a pinned allocation.
     * 
     *      * CUresult cuMemHostGetFlags (
     *      unsigned int* pFlags,
     *      void* p )
     * 
     * 
     *   Passes back flags that were used for a
     *     pinned allocation.  Passes back the flags pFlags that were
     *     specified when allocating the pinned host buffer p allocated
     *     by cuMemHostAlloc.
     *   
     *   cuMemHostGetFlags() will fail if the
     *     pointer does not reside in an allocation performed by cuMemAllocHost()
     *     or cuMemHostAlloc().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pFlags Returned flags word
     * @param p Host pointer
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemHostAlloc
     */    
    public static int cuMemHostGetFlags (int pFlags[], Pointer p)
    {
        return checkResult(cuMemHostGetFlagsNative(pFlags, p));
    }

    private static native int cuMemHostGetFlagsNative(int pFlags[], Pointer p);




    /**
     * Returns a handle to a compute device.
     * 
     *      * CUresult cuDeviceGetByPCIBusId (
     *      CUdevice* dev,
     *      char* pciBusId )
     * 
     * 
     *   Returns a handle to a compute device. 
     *     Returns in *device a device handle given a PCI bus ID
     *     string.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dev Returned device handle
     * @param pciBusId String in one of the following forms: [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetPCIBusId
     */    
    public static int cuDeviceGetByPCIBusId(CUdevice dev, String pciBusId)
    {
        return checkResult(cuDeviceGetByPCIBusIdNative(dev, pciBusId));
    }
    private static native int cuDeviceGetByPCIBusIdNative(CUdevice dev, String pciBusId);


    public static int cuMemAllocManaged(CUdeviceptr dptr, long bytesize, int flags)
    {
        return checkResult(cuMemAllocManagedNative(dptr, bytesize, flags));
    }
    private static native int cuMemAllocManagedNative(CUdeviceptr dptr, long bytesize, int flags);
    
    
    /**
     * Returns a PCI Bus Id string for the device.
     * 
     *      * CUresult cuDeviceGetPCIBusId (
     *      char* pciBusId,
     *      int  len,
     *      CUdevice dev )
     * 
     * 
     *   Returns a PCI Bus Id string for the
     *     device.  Returns an ASCII string identifying the device dev
     *     in the NULL-terminated string pointed to by pciBusId. len specifies the maximum length of the string that may be
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pciBusId Returned identifier string for the device in the following format [domain]:[bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values. pciBusId should be large enough to store 13 characters including the NULL-terminator.
     * @param len Maximum length of string to store in name
     * @param dev Device to get identifier string for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetByPCIBusId
     */    
    public static int cuDeviceGetPCIBusId(String pciBusId[], int len, CUdevice dev)
    {
        return checkResult(cuDeviceGetPCIBusIdNative(pciBusId, len, dev));
    }
    private static native int cuDeviceGetPCIBusIdNative(String pciBusId[], int len, CUdevice dev);


    /**
     * Gets an interprocess handle for a previously allocated event.
     * 
     *      * CUresult cuIpcGetEventHandle (
     *      CUipcEventHandle* pHandle,
     *      CUevent event )
     * 
     * 
     *   Gets an interprocess handle for a
     *     previously allocated event.  Takes as input a previously allocated
     *     event. This event must
     *     have been created with the
     *     CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags set. This
     *     opaque handle may be copied into other processes and opened with
     *     cuIpcOpenEventHandle to allow efficient hardware synchronization
     *     between GPU work in different processes.
     *   
     *   After the event has been been opened in
     *     the importing process, cuEventRecord, cuEventSynchronize,
     *     cuStreamWaitEvent and cuEventQuery may be used in either process.
     *     Performing operations on the imported event after the exported event
     *     has been freed with cuEventDestroy will result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     * 
     * @param pHandle Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle
     * @param event Event allocated with CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
     * CUDA_ERROR_MAP_FAILED
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */    
    public static int cuIpcGetEventHandle(CUipcEventHandle pHandle, CUevent event)
    {
        return checkResult(cuIpcGetEventHandleNative(pHandle, event));
    }
    private static native int cuIpcGetEventHandleNative(CUipcEventHandle pHandle, CUevent event);


    /**
     * Opens an interprocess event handle for use in the current process.
     * 
     *      * CUresult cuIpcOpenEventHandle (
     *      CUevent* phEvent,
     *      CUipcEventHandle handle )
     * 
     * 
     *   Opens an interprocess event handle for
     *     use in the current process.  Opens an interprocess event handle exported
     *     from another
     *     process with cuIpcGetEventHandle. This
     *     function returns a CUevent that behaves like a locally created event
     *     with the CU_EVENT_DISABLE_TIMING flag specified. This event must be
     *     freed with cuEventDestroy.
     *   
     *   Performing operations on the imported
     *     event after the exported event has been freed with cuEventDestroy will
     *     result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     * 
     * @param phEvent Returns the imported event
     * @param handle Interprocess handle to open
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */    
    public static int cuIpcOpenEventHandle(CUevent phEvent, CUipcEventHandle handle)
    {
        return checkResult(cuIpcOpenEventHandleNative(phEvent, handle));
    }
    private static native int cuIpcOpenEventHandleNative(CUevent phEvent, CUipcEventHandle handle);


    /**
     * Gets an interprocess memory handle for an existing device memory
     * allocation.
     *
     *      * CUresult cuIpcGetMemHandle (
     *      CUipcMemHandle* pHandle,
     *      CUdeviceptr dptr )
     * 
     * 
     *    /brief Gets an interprocess memory
     *     handle for an existing device memory allocation
     *   
     *   Takes a pointer to the base of an
     *     existing device memory allocation created with cuMemAlloc and exports
     *     it for use in another process. This is a lightweight operation and may
     *     be called multiple times on an allocation
     *     without adverse effects.
     *   
     *   If a region of memory is freed with
     *     cuMemFree and a subsequent call to cuMemAlloc returns memory with the
     *     same device address, cuIpcGetMemHandle will return a unique handle for
     *     the new memory.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     * 
     * @param pHandle Pointer to user allocated CUipcMemHandle to return the handle in.
     * @param dptr Base pointer to previously allocated device memory
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
     * CUDA_ERROR_MAP_FAILED,
     * 
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */    
    public static int cuIpcGetMemHandle(CUipcMemHandle pHandle, CUdeviceptr dptr)
    {
        return checkResult(cuIpcGetMemHandleNative(pHandle, dptr));
    }
    private static native int cuIpcGetMemHandleNative(CUipcMemHandle pHandle, CUdeviceptr dptr);


    /**
     * 
     *      * CUresult cuIpcOpenMemHandle (
     *      CUdeviceptr* pdptr,
     *      CUipcMemHandle handle,
     *      unsigned int  Flags )
     * 
     * 
     *    /brief Opens an interprocess memory
     *     handle exported from another process and returns a device pointer
     *     usable in the local
     *     process.
     *   
     *   Maps memory exported from another
     *     process with cuIpcGetMemHandle into the current device address space.
     *     For contexts on different devices cuIpcOpenMemHandle can attempt to
     *     enable peer access between the devices as if the user called
     *     cuCtxEnablePeerAccess. This behavior is controlled by the
     *     CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. cuDeviceCanAccessPeer can
     *     determine if a mapping is possible.
     *   
     *   Contexts that may open CUipcMemHandles
     *     are restricted in the following way. CUipcMemHandles from each CUdevice
     *     in a given process may only be opened by one CUcontext per CUdevice
     *     per other process.
     *   
     *   Memory returned from cuIpcOpenMemHandle
     *     must be freed with cuIpcCloseMemHandle.
     *   
     *   Calling cuMemFree on an exported memory
     *     region before calling cuIpcCloseMemHandle in the importing context will
     *     result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     * 
     * @param pdptr Returned device pointer
     * @param handle CUipcMemHandle to open
     * @param Flags Flags for this operation. Must be specified as CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_TOO_MANY_PEERS
     * 
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     * @see JCudaDriver#cuCtxEnablePeerAccess
     * @see JCudaDriver#cuDeviceCanAccessPeer
     */    
    public static int cuIpcOpenMemHandle(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags)
    {
        return checkResult(cuIpcOpenMemHandleNative(pdptr, handle, Flags));
    }
    private static native int cuIpcOpenMemHandleNative(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags);


    /**
     * Close memory mapped with cuIpcOpenMemHandle.
     * 
     *      * CUresult cuIpcCloseMemHandle (
     *      CUdeviceptr dptr )
     * 
     * 
     *   Close memory mapped with cuIpcOpenMemHandle.
     *     Unmaps memory returnd by cuIpcOpenMemHandle. The original allocation
     *     in the exporting process as well as imported mappings in other processes
     *     will be unaffected.
     *   
     *   Any resources used to enable peer access
     *     will be freed if this is the last mapping using them.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     * 
     * @param dptr Device pointer returned by cuIpcOpenMemHandle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_INVALID_HANDLE,
     * 
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     */    
    public static int cuIpcCloseMemHandle(CUdeviceptr dptr)
    {
        return checkResult(cuIpcCloseMemHandleNative(dptr));
    }
    private static native int cuIpcCloseMemHandleNative(CUdeviceptr dptr);




    /**
     * Registers an existing host memory range for use by CUDA.
     * 
     *      * CUresult cuMemHostRegister (
     *      void* p,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
     * 
     *   Registers an existing host memory range
     *     for use by CUDA.  Page-locks the memory range specified by p
     *     and bytesize and maps it for the device(s) as specified by
     *     Flags. This memory range also is added to the same tracking
     *     mechanism as cuMemHostAlloc to automatically accelerate calls to
     *     functions such as cuMemcpyHtoD(). Since the memory can be accessed
     *     directly by the device, it can be read or written with much higher
     *     bandwidth than pageable
     *     memory that has not been registered.
     *     Page-locking excessive amounts of memory may degrade system performance,
     *     since it reduces
     *     the amount of memory available to the
     *     system for paging. As a result, this function is best used sparingly
     *     to register staging
     *     areas for data exchange between host and
     *     device.
     *   
     *   This function has limited support on
     *     Mac OS X. OS 10.7 or higher is required.
     *   
     *   The Flags parameter enables
     *     different options to be specified that affect the allocation, as
     *     follows.
     *   
     *   
     *     
     *       CU_MEMHOSTREGISTER_PORTABLE:
     *         The memory returned by this call will be considered as pinned memory
     *         by all CUDA contexts, not just the one that performed
     *         the allocation.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTREGISTER_DEVICEMAP:
     *         Maps the allocation into the CUDA address space. The device pointer to
     *         the memory may be obtained by calling cuMemHostGetDevicePointer(). This
     *         feature is available only on GPUs with compute capability greater than
     *         or equal to 1.1.
     *       
     *     
     *   
     *   
     *   All of these flags are orthogonal to
     *     one another: a developer may page-lock memory that is portable or
     *     mapped with no restrictions.
     *   
     *   The CUDA context must have been created
     *     with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTREGISTER_DEVICEMAP
     *     flag to have any effect.
     *   
     *   The CU_MEMHOSTREGISTER_DEVICEMAP flag
     *     may be specified on CUDA contexts for devices that do not support
     *     mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
     *     because the memory may be mapped into other CUDA contexts via the
     *     CU_MEMHOSTREGISTER_PORTABLE flag.
     *   
     *   The memory page-locked by this function
     *     must be unregistered with cuMemHostUnregister().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param p Host pointer to memory to page-lock
     * @param bytesize Size in bytes of the address range to page-lock
     * @param Flags Flags for allocation request
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
     * 
     * @see JCudaDriver#cuMemHostUnregister
     * @see JCudaDriver#cuMemHostGetFlags
     * @see JCudaDriver#cuMemHostGetDevicePointer
     */    
    public static int cuMemHostRegister(Pointer p, long bytesize, int Flags)
    {
        return checkResult(cuMemHostRegisterNative(p, bytesize, Flags));
    }
    private static native int cuMemHostRegisterNative(Pointer p, long bytesize, int Flags);


    /**
     * Unregisters a memory range that was registered with cuMemHostRegister.
     * 
     *      * CUresult cuMemHostUnregister (
     *      void* p )
     * 
     * 
     *   Unregisters a memory range that was
     *     registered with cuMemHostRegister.  Unmaps the memory range whose base
     *     address is specified
     *     by p, and makes it pageable
     *     again.
     *   
     *   The base address must be the same one
     *     specified to cuMemHostRegister().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param p Host pointer to memory to unregister
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
     * 
     * @see JCudaDriver#cuMemHostRegister
     */    
    public static int cuMemHostUnregister(Pointer p)
    {
        return checkResult(cuMemHostUnregisterNative(p));
    }
    private static native int cuMemHostUnregisterNative(Pointer p);


    /**
     * Copies memory.
     * 
     *      * CUresult cuMemcpy (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory.  Copies data between two
     *     pointers. dst and src are base pointers of the
     *     destination and source, respectively. ByteCount specifies
     *     the number of bytes to copy. Note that this function infers the type
     *     of the transfer (host to host, host to device,
     *     device to device, or device to host) from
     *     the pointer values. This function is only allowed in contexts which
     *     support unified
     *     addressing. Note that this function is
     *     synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dst Destination unified virtual address space pointer
     * @param src Source unified virtual address space pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpy(CUdeviceptr dst, CUdeviceptr src, long ByteCount)
    {
        return checkResult(cuMemcpyNative(dst, src, ByteCount));
    }
    private static native int cuMemcpyNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount);


    /**
     * Copies device memory between two contexts.
     * 
     *      * CUresult cuMemcpyPeer (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount )
     * 
     * 
     *   Copies device memory between two contexts.
     *     Copies from device memory in one context to device memory in another
     *     context.
     *     dstDevice is the base device
     *     pointer of the destination memory and dstContext is the
     *     destination context. srcDevice is the base device pointer of
     *     the source memory and srcContext is the source pointer. ByteCount specifies the number of bytes to copy.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host, but serialized with respect all pending and
     *     future asynchronous
     *     work in to the current context, srcContext, and dstContext (use cuMemcpyPeerAsync to
     *     avoid this synchronization).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstContext Destination context
     * @param srcDevice Source device pointer
     * @param srcContext Source context
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpy3DPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */    
    public static int cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount)
    {
        return cuMemcpyPeerNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount);
    }
    private static native int cuMemcpyPeerNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount);

    /**
     * Allocates device memory.
     * 
     *      * CUresult cuMemAlloc (
     *      CUdeviceptr* dptr,
     *      size_t bytesize )
     * 
     * 
     *   Allocates device memory.  Allocates bytesize bytes of linear memory on the device and returns in *dptr a pointer to the allocated memory. The allocated memory is
     *     suitably aligned for any kind of variable. The memory is not cleared.
     *     If bytesize is 0, cuMemAlloc()
     *     returns CUDA_ERROR_INVALID_VALUE.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Returned device pointer
     * @param bytesize Requested allocation size in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
    {
        return checkResult(cuMemAllocNative(dptr, bytesize));
    }

    private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize);


    /**
     * Allocates pitched device memory.
     * 
     *      * CUresult cuMemAllocPitch (
     *      CUdeviceptr* dptr,
     *      size_t* pPitch,
     *      size_t WidthInBytes,
     *      size_t Height,
     *      unsigned int  ElementSizeBytes )
     * 
     * 
     *   Allocates pitched device memory. 
     *     Allocates at least WidthInBytes * Height bytes of
     *     linear memory on the device and returns in *dptr a pointer
     *     to the allocated memory. The function may pad the allocation to ensure
     *     that corresponding pointers in any given
     *     row will continue to meet the alignment
     *     requirements for coalescing as the address is updated from row to row.
     *     ElementSizeBytes specifies the size of the largest reads and
     *     writes that will be performed on the memory range. ElementSizeBytes may be 4, 8 or 16 (since coalesced memory
     *     transactions are not possible on other data sizes). If ElementSizeBytes is smaller than the actual read/write size of a
     *     kernel, the kernel will run correctly, but possibly at reduced speed.
     *     The
     *     pitch returned in *pPitch by
     *     cuMemAllocPitch() is the width in bytes of the allocation. The intended
     *     usage of pitch is as a separate parameter of the allocation, used to
     *     compute addresses within the 2D array.
     *     Given the row and column of an array element of type T,
     *     the address is computed as: 
     *   
   T* pElement = (T*)((char*)BaseAddress
     * + Row * Pitch) + Column;
     *   
     *   The pitch returned by cuMemAllocPitch()
     *     is guaranteed to work with cuMemcpy2D() under all circumstances. For
     *     allocations of 2D arrays, it is recommended that programmers consider
     *     performing pitch allocations
     *     using cuMemAllocPitch(). Due to alignment
     *     restrictions in the hardware, this is especially true if the application
     *     will be performing 2D memory copies
     *     between different regions of device
     *     memory (whether linear memory or CUDA arrays).
     *   
     *   The byte alignment of the pitch returned
     *     by cuMemAllocPitch() is guaranteed to match or exceed the alignment
     *     requirement for texture binding with cuTexRefSetAddress2D().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Returned device pointer
     * @param pPitch Returned pitch of allocation in bytes
     * @param WidthInBytes Requested allocation width in bytes
     * @param Height Requested allocation height in rows
     * @param ElementSizeBytes Size of largest reads/writes for range
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemAllocPitch(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes)
    {
        return checkResult(cuMemAllocPitchNative(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes));
    }

    private static native int cuMemAllocPitchNative(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes);


    /**
     * Frees device memory.
     * 
     *      * CUresult cuMemFree (
     *      CUdeviceptr dptr )
     * 
     * 
     *   Frees device memory.  Frees the memory
     *     space pointed to by dptr, which must have been returned by a
     *     previous call to cuMemAlloc() or cuMemAllocPitch().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Pointer to memory to free
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemFree(CUdeviceptr dptr)
    {
        return checkResult(cuMemFreeNative(dptr));
    }

    private static native int cuMemFreeNative(CUdeviceptr dptr);


    /**
     * Get information on memory allocations.
     * 
     *      * CUresult cuMemGetAddressRange (
     *      CUdeviceptr* pbase,
     *      size_t* psize,
     *      CUdeviceptr dptr )
     * 
     * 
     *   Get information on memory allocations. 
     *     Returns the base address in *pbase and size in *psize
     *     of the allocation by cuMemAlloc() or cuMemAllocPitch() that contains
     *     the input pointer dptr. Both parameters pbase and
     *     psize are optional. If one of them is NULL, it is ignored.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pbase Returned base address
     * @param psize Returned size of device memory allocation
     * @param dptr Device pointer to query
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemGetAddressRange(CUdeviceptr pbase, long psize[], CUdeviceptr dptr)
    {
        return checkResult(cuMemGetAddressRangeNative(pbase, psize, dptr));
    }

    private static native int cuMemGetAddressRangeNative(CUdeviceptr pbase, long psize[], CUdeviceptr dptr);


    /**
     * Allocates page-locked host memory.
     * 
     *      * CUresult cuMemAllocHost (
     *      void** pp,
     *      size_t bytesize )
     * 
     * 
     *   Allocates page-locked host memory. 
     *     Allocates bytesize bytes of host memory that is page-locked
     *     and accessible to the device. The driver tracks the virtual memory
     *     ranges allocated
     *     with this function and automatically
     *     accelerates calls to functions such as cuMemcpy(). Since the memory
     *     can be accessed directly by the device, it can be read or written with
     *     much higher bandwidth than pageable
     *     memory obtained with functions such as
     *     malloc(). Allocating excessive amounts of memory with cuMemAllocHost()
     *     may degrade system performance, since it reduces the amount of memory
     *     available to the system for paging. As a result, this
     *     function is best used sparingly to
     *     allocate staging areas for data exchange between host and device.
     *   
     *   Note all host memory allocated using
     *     cuMemHostAlloc() will automatically be immediately accessible to all
     *     contexts on all devices which support unified addressing (as may be
     *     queried
     *     using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
     *     The device pointer that may be used to access this host memory from
     *     those contexts is always equal to the returned host
     *     pointer *pp. See Unified
     *     Addressing for additional details.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pp Returned host pointer to page-locked memory
     * @param bytesize Requested allocation size in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemAllocHost(Pointer pointer, long bytesize)
    {
        return checkResult(cuMemAllocHostNative(pointer, bytesize));
    }

    private static native int cuMemAllocHostNative(Pointer pp, long bytesize);


    /**
     * Frees page-locked host memory.
     * 
     *      * CUresult cuMemFreeHost (
     *      void* p )
     * 
     * 
     *   Frees page-locked host memory.  Frees
     *     the memory space pointed to by p, which must have been
     *     returned by a previous call to cuMemAllocHost().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param p Pointer to memory to free
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemFreeHost(Pointer p)
    {
        return checkResult(cuMemFreeHostNative(p));
    }

    private static native int cuMemFreeHostNative(Pointer p);


    /**
     * Copies memory from Host to Device.
     * 
     *      * CUresult cuMemcpyHtoD (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Host to Device. 
     *     Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
     *     respectively. ByteCount specifies the number of bytes to
     *     copy. Note that this function is synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount)
    {
        return checkResult(cuMemcpyHtoDNative(dstDevice, srcHost, ByteCount));
    }

    private static native int cuMemcpyHtoDNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount);


    /**
     * Copies memory from Device to Host.
     * 
     *      * CUresult cuMemcpyDtoH (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Host. 
     *     Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstHost Destination host pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Device to Device.
     * 
     *      * CUresult cuMemcpyDtoD (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Device. 
     *     Copies from device memory to device memory. dstDevice and
     *     srcDevice are the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is asynchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoDNative(dstDevice, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoDNative(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Device to Array.
     * 
     *      * CUresult cuMemcpyDtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Array. 
     *     Copies from device memory to a 1D CUDA array. dstArray and
     *     dstOffset specify the CUDA array handle and starting index
     *     of the destination data. srcDevice specifies the base pointer
     *     of the source. ByteCount specifies the number of bytes to
     *     copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyDtoA(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoANative(dstArray, dstIndex, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoANative(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Array to Device.
     * 
     *      * CUresult cuMemcpyAtoD (
     *      CUdeviceptr dstDevice,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Device. 
     *     Copies from one 1D CUDA array to device memory. dstDevice
     *     specifies the base pointer of the destination and must be naturally
     *     aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array handle and the offset in bytes
     *     into the array where the copy is to begin. ByteCount specifies
     *     the number of bytes to copy and must be evenly divisible by the array
     *     element size.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoDNative(dstDevice, hSrc, SrcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoDNative(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount);


    /**
     * Copies memory from Host to Array.
     * 
     *      * CUresult cuMemcpyHtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Host to Array.  Copies
     *     from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
     *     bytes of the destination data. pSrc specifies the base
     *     address of the source. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyHtoA(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount)
    {
        return checkResult(cuMemcpyHtoANative(dstArray, dstIndex, pSrc, ByteCount));
    }

    private static native int cuMemcpyHtoANative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount);




    /**
     * Copies memory from Array to Host.
     * 
     *      * CUresult cuMemcpyAtoH (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Host.  Copies
     *     from one 1D CUDA array to host memory. dstHost specifies the
     *     base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
     *     bytes of the source data. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstHost Destination device pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoHNative(dstHost, srcArray, srcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoHNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount);


    /**
     * Copies memory from Array to Array.
     * 
     *      * CUresult cuMemcpyAtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Array. 
     *     Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA
     *     arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source offsets in bytes
     *     into the CUDA arrays. ByteCount is the number of bytes to be
     *     copied. The size of the elements in the CUDA arrays need not be the
     *     same format, but the elements
     *     must be the same size; and count must be
     *     evenly divisible by that size.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpyAtoA(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoANative(dstArray, dstIndex, srcArray, srcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoANative(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount);


    /**
     * Copies memory for 2D arrays.
     * 
     *      * CUresult cuMemcpy2D (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *           const void *srcHost;
     *           CUdeviceptr srcDevice;
     *           CUarray srcArray;
     *           unsigned int srcPitch;
     * 
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *           void *dstHost;
     *           CUdeviceptr dstDevice;
     *           CUarray dstArray;
     *           unsigned int dstPitch;
     * 
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is 
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy)
    {
        return checkResult(cuMemcpy2DNative(pCopy));
    }

    private static native int cuMemcpy2DNative(CUDA_MEMCPY2D pCopy);


    /**
     * Copies memory for 2D arrays.
     * 
     *      * CUresult cuMemcpy2DUnaligned (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is 
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy)
    {
        return checkResult(cuMemcpy2DUnalignedNative(pCopy));
    }

    private static native int cuMemcpy2DUnalignedNative(CUDA_MEMCPY2D pCopy);


    /**
     * Copies memory for 3D arrays.
     * 
     *      * CUresult cuMemcpy3D (
     *      const CUDA_MEMCPY3D* pCopy )
     * 
     * 
     *   Copies memory for 3D arrays.  Perform a
     *     3D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY3D structure is defined as:
     *   
     *           typedef struct CUDA_MEMCPY3D_st
     * {
     * 
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost, srcPitch and srcHeight specify the (host) base address of the
     *     source data, the bytes per row, and the height of
     *     each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice, srcPitch and srcHeight specify the (device) base address of
     *     the source data, the bytes per row, and the height
     *     of each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice,
     *     srcPitch and srcHeight are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice, dstPitch and dstHeight are ignored.
     *   
     *   
     *     
     *       srcXInBytes, srcY and srcZ
     *         specify the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is 
     *   
  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes, dstY and dstZ
     *         specify the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes, Height and Depth
     *         specify the width (in bytes), height and depth of the 3D copy being
     *         performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy3D() returns an error if any
     *     pitch is greater than the maximum allowed
     *     (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *   
     *   
     *     The srcLOD and dstLOD members of the
     *     CUDA_MEMCPY3D structure must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy)
    {
        return checkResult(cuMemcpy3DNative(pCopy));
    }

    private static native int cuMemcpy3DNative(CUDA_MEMCPY3D pCopy);


    /**
     * Copies memory between contexts.
     * 
     *      * CUresult cuMemcpy3DPeer (
     *      const CUDA_MEMCPY3D_PEER* pCopy )
     * 
     * 
     *   Copies memory between contexts.  Perform
     *     a 3D memory copy according to the parameters specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER structure
     *     for documentation of its parameters.
     *   
     *   Note that this function is synchronous
     *     with respect to the host only if the source or destination memory is
     *     of type CU_MEMORYTYPE_HOST. Note also that this copy is serialized with
     *     respect all pending and future asynchronous work in to the current
     *     context,
     *     the copy's source context, and the copy's
     *     destination context (use cuMemcpy3DPeerAsync to avoid this
     *     synchronization).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */    
    public static int cuMemcpy3DPeer(CUDA_MEMCPY3D_PEER pCopy)
    {
        return checkResult(cuMemcpy3DPeerNative(pCopy));
    }
    private static native int cuMemcpy3DPeerNative(CUDA_MEMCPY3D_PEER pCopy);


    /**
     * Copies memory asynchronously.
     * 
     *      * CUresult cuMemcpyAsync (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory asynchronously.  Copies
     *     data between two pointers. dst and src are base
     *     pointers of the destination and source, respectively. ByteCount
     *     specifies the number of bytes to copy. Note that this function infers
     *     the type of the transfer (host to host, host to device,
     *     device to device, or device to host) from
     *     the pointer values. This function is only allowed in contexts which
     *     support unified
     *     addressing. Note that this function is
     *     asynchronous and can optionally be associated to a stream by passing a
     *     non-zero hStream argument
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dst Destination unified virtual address space pointer
     * @param src Source unified virtual address space pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyAsyncNative(dst, src, ByteCount, hStream));
    }
    private static native int cuMemcpyAsyncNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream);


    /**
     * Copies device memory between two contexts asynchronously.
     * 
     *      * CUresult cuMemcpyPeerAsync (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies device memory between two contexts
     *     asynchronously.  Copies from device memory in one context to device
     *     memory in another
     *     context. dstDevice is the base
     *     device pointer of the destination memory and dstContext is
     *     the destination context. srcDevice is the base device pointer
     *     of the source memory and srcContext is the source pointer.
     *     ByteCount specifies the number of bytes to copy. Note that
     *     this function is asynchronous with respect to the host and all work in
     *     other
     *     streams in other devices.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstContext Destination context
     * @param srcDevice Source device pointer
     * @param srcContext Source context
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpy3DPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */    
    public static int cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyPeerAsyncNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream));
    }
    private static native int cuMemcpyPeerAsyncNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Host to Device.
     * 
     *      * CUresult cuMemcpyHtoDAsync (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Host to Device. 
     *     Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
     *     respectively. ByteCount specifies the number of bytes to
     *     copy.
     *   
     *   cuMemcpyHtoDAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyHtoDAsyncNative(dstDevice, srcHost, ByteCount, hStream));
    }

    private static native int cuMemcpyHtoDAsyncNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Device to Host.
     * 
     *      * CUresult cuMemcpyDtoHAsync (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Device to Host. 
     *     Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy.
     *   
     *   cuMemcpyDtoHAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstHost Destination host pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyDtoHAsync(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyDtoHAsyncNative(dstHost, srcDevice, ByteCount, hStream));
    }

    private static native int cuMemcpyDtoHAsyncNative(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);

    /**
     * Copies memory from Device to Device.
     * 
     *      * CUresult cuMemcpyDtoDAsync (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Device to Device. 
     *     Copies from device memory to device memory. dstDevice and
     *     srcDevice are the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is asynchronous and can optionally be
     *     associated to a stream
     *     by passing a non-zero hStream
     *     argument
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyDtoDAsyncNative(dstDevice, srcDevice, ByteCount, hStream));
    }

    private static native int cuMemcpyDtoDAsyncNative(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Host to Array.
     * 
     *      * CUresult cuMemcpyHtoAAsync (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Host to Array.  Copies
     *     from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
     *     bytes of the destination data. srcHost specifies the base
     *     address of the source. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   cuMemcpyHtoAAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyHtoAAsync(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyHtoAAsyncNative(dstArray, dstIndex, pSrc, ByteCount, hStream));
    }

    private static native int cuMemcpyHtoAAsyncNative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Array to Host.
     * 
     *      * CUresult cuMemcpyAtoHAsync (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Array to Host.  Copies
     *     from one 1D CUDA array to host memory. dstHost specifies the
     *     base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
     *     bytes of the source data. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   cuMemcpyAtoHAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstHost Destination pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyAtoHAsyncNative(dstHost, srcArray, srcIndex, ByteCount, hStream));
    }

    private static native int cuMemcpyAtoHAsyncNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream);


    /**
     * Copies memory for 2D arrays.
     * 
     *      * CUresult cuMemcpy2DAsync (
     *      const CUDA_MEMCPY2D* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is 
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   cuMemcpy2DAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy2DAsyncNative(pCopy, hStream));
    }

    private static native int cuMemcpy2DAsyncNative(CUDA_MEMCPY2D pCopy, CUstream hStream);


    /**
     * Copies memory for 3D arrays.
     * 
     *      * CUresult cuMemcpy3DAsync (
     *      const CUDA_MEMCPY3D* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory for 3D arrays.  Perform a
     *     3D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY3D structure is defined as:
     *   
     *           typedef struct CUDA_MEMCPY3D_st
     * {
     * 
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost, srcPitch and srcHeight specify the (host) base address of the
     *     source data, the bytes per row, and the height of
     *     each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice, srcPitch and srcHeight specify the (device) base address of
     *     the source data, the bytes per row, and the height
     *     of each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice,
     *     srcPitch and srcHeight are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice, dstPitch and dstHeight are ignored.
     *   
     *   
     *     
     *       srcXInBytes, srcY and srcZ
     *         specify the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is 
     *   
  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes, dstY and dstZ
     *         specify the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is 
     *   
  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes, Height and Depth
     *         specify the width (in bytes), height and depth of the 3D copy being
     *         performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy3D() returns an error if any
     *     pitch is greater than the maximum allowed
     *     (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *   
     *   cuMemcpy3DAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   The srcLOD and dstLOD members of the
     *     CUDA_MEMCPY3D structure must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy3DAsyncNative(pCopy, hStream));
    }

    private static native int cuMemcpy3DAsyncNative(CUDA_MEMCPY3D pCopy, CUstream hStream);


    /**
     * Copies memory between contexts asynchronously.
     * 
     *      * CUresult cuMemcpy3DPeerAsync (
     *      const CUDA_MEMCPY3D_PEER* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory between contexts
     *     asynchronously.  Perform a 3D memory copy according to the parameters
     *     specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER
     *     structure for documentation of its parameters.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */    
    public static int cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy3DPeerAsyncNative(pCopy, hStream));
    }
    private static native int cuMemcpy3DPeerAsyncNative(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream);


    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD8 (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 8-bit values to the specified value uc.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param uc Value to set
     * @param N Number of elements
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD8(CUdeviceptr dstDevice, byte uc, long N)
    {
        return checkResult(cuMemsetD8Native(dstDevice, uc, N));
    }

    private static native int cuMemsetD8Native(CUdeviceptr dstDevice, byte uc, long N);


    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD16 (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 16-bit values to the specified value us. The dstDevice pointer must be two byte aligned.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param us Value to set
     * @param N Number of elements
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD16(CUdeviceptr dstDevice, short us, long N)
    {
        return checkResult(cuMemsetD16Native(dstDevice, us, N));
    }

    private static native int cuMemsetD16Native(CUdeviceptr dstDevice, short us, long N);


    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD32 (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 32-bit values to the specified value ui. The dstDevice pointer must be four byte aligned.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param ui Value to set
     * @param N Number of elements
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD32(CUdeviceptr dstDevice, int ui, long N)
    {
        return checkResult(cuMemsetD32Native(dstDevice, ui, N));
    }

    private static native int cuMemsetD32Native(CUdeviceptr dstDevice, int ui, long N);



    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD2D8 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 8-bit values to the specified value
     *     uc. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     This function performs fastest when the pitch is one that has been
     *     passed
     *     back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param uc Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D8(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height)
    {
        return checkResult(cuMemsetD2D8Native(dstDevice, dstPitch, uc, Width, Height));
    }

    private static native int cuMemsetD2D8Native(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height);


    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD2D16 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 16-bit values to the specified value
     *     us. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     The dstDevice pointer and dstPitch offset must be
     *     two byte aligned. This function performs fastest when the pitch is one
     *     that has been passed back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param us Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D16(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height)
    {
        return checkResult(cuMemsetD2D16Native(dstDevice, dstPitch, us, Width, Height));
    }

    private static native int cuMemsetD2D16Native(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height);


    /**
     * Initializes device memory.
     * 
     *      * CUresult cuMemsetD2D32 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 32-bit values to the specified value
     *     ui. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     The dstDevice pointer and dstPitch offset must be
     *     four byte aligned. This function performs fastest when the pitch is
     *     one that has been passed back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param ui Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D32(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height)
    {
        return checkResult(cuMemsetD2D32Native(dstDevice, dstPitch, ui, Width, Height));
    }

    private static native int cuMemsetD2D32Native(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height);


    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD8Async (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 8-bit values to the specified value uc.
     *   
     *   cuMemsetD8Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param uc Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD8Async(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD8AsyncNative(dstDevice, uc, N, hStream));
    }

    private static native int cuMemsetD8AsyncNative(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream);


    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD16Async (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 16-bit values to the specified value us.
     *     The dstDevice pointer must be two byte aligned.
     *   
     *   cuMemsetD16Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param us Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD16Async(CUdeviceptr dstDevice, short us, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD16AsyncNative(dstDevice, us, N, hStream));
    }

    private static native int cuMemsetD16AsyncNative(CUdeviceptr dstDevice, short us, long N, CUstream hStream);


    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD32Async (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 32-bit values to the specified value ui.
     *     The dstDevice pointer must be four byte aligned.
     *   
     *   cuMemsetD32Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param ui Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuMemsetD32Async(CUdeviceptr dstDevice, int ui, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD32AsyncNative(dstDevice, ui, N, hStream));
    }

    private static native int cuMemsetD32AsyncNative(CUdeviceptr dstDevice, int ui, long N, CUstream hStream);



    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD2D8Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 8-bit values to the specified value uc. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. This
     *     function performs fastest when the pitch is one that has been passed
     *     back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D8Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param uc Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D8Async(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D8AsyncNative(dstDevice, dstPitch, uc, Width, Height, hStream));
    }

    private static native int cuMemsetD2D8AsyncNative(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream);


    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD2D16Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 16-bit values to the specified value us. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. The
     *     dstDevice pointer and dstPitch offset must be two
     *     byte aligned. This function performs fastest when the pitch is one that
     *     has been passed back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D16Async() is asynchronous
     *     and can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param us Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D16Async(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D16AsyncNative(dstDevice, dstPitch, us, Width, Height, hStream));
    }

    private static native int cuMemsetD2D16AsyncNative(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream);


    /**
     * Sets device memory.
     * 
     *      * CUresult cuMemsetD2D32Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 32-bit values to the specified value ui. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. The
     *     dstDevice pointer and dstPitch offset must be four
     *     byte aligned. This function performs fastest when the pitch is one that
     *     has been passed back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D32Async() is asynchronous
     *     and can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param ui Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */    
    public static int cuMemsetD2D32Async(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D32AsyncNative(dstDevice, dstPitch, ui, Width, Height, hStream));
    }

    private static native int cuMemsetD2D32AsyncNative(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream);


    /**
     * Returns information about a function.
     * 
     *      * CUresult cuFuncGetAttribute (
     *      int* pi,
     *      CUfunction_attribute attrib,
     *      CUfunction hfunc )
     * 
     * 
     *   Returns information about a function. 
     *     Returns in *pi the integer value of the attribute attrib on the kernel given by hfunc. The supported
     *     attributes are:
     *   

     *     
     *       CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
     *         The maximum number of threads per block, beyond which a launch of the
     *         function would fail. This number depends on both the
     *         function and the device on which
     *         the function is currently loaded.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
     *         The size in bytes of statically-allocated shared memory per block
     *         required by this function. This does not include dynamically-allocated
     *         shared memory requested by the
     *         user at runtime.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES:
     *         The size in bytes of user-allocated constant memory required by this
     *         function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES:
     *         The size in bytes of local memory used by each thread of this
     *         function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_NUM_REGS:
     *         The number of registers used by each thread of this function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_PTX_VERSION:
     *         The PTX virtual architecture version for which the function was
     *         compiled. This value is the major PTX version * 10 + the
     *         minor PTX version, so a PTX
     *         version 1.3 function would return the value 13. Note that this may
     *         return the undefined value
     *         of 0 for cubins compiled prior
     *         to CUDA 3.0.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_BINARY_VERSION:
     *         The binary architecture version for which the function was compiled.
     *         This value is the major binary version * 10 + the minor
     *         binary version, so a binary
     *         version 1.3 function would return the value 13. Note that this will
     *         return a value of 10 for legacy
     *         cubins that do not have a
     *         properly-encoded binary architecture version.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pi Returned attribute value
     * @param attrib Attribute requested
     * @param hfunc Function to query attribute of
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuFuncGetAttribute (int pi[], int attrib, CUfunction func)
    {
        return checkResult(cuFuncGetAttributeNative(pi, attrib, func));
    }
    private static native int cuFuncGetAttributeNative(int pi[], int attrib, CUfunction func);


    /**
     * Sets the block-dimensions for the function.
     * 
     *      * CUresult cuFuncSetBlockShape (
     *      CUfunction hfunc,
     *      int  x,
     *      int  y,
     *      int  z )
     * 
     * 
     *   Sets the block-dimensions for the
     *     function.  
     *     DeprecatedSpecifies the x, y, and z dimensions of the thread blocks that are
     *     created when the kernel given by hfunc is launched.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to specify dimensions of
     * @param x X dimension
     * @param y Y dimension
     * @param z Z dimension
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
    {
        return checkResult(cuFuncSetBlockShapeNative(hfunc, x, y, z));
    }

    private static native int cuFuncSetBlockShapeNative(CUfunction hfunc, int x, int y, int z);


    /**
     * Sets the dynamic shared-memory size for the function.
     * 
     *      * CUresult cuFuncSetSharedSize (
     *      CUfunction hfunc,
     *      unsigned int  bytes )
     * 
     * 
     *   Sets the dynamic shared-memory size for
     *     the function.  
     *     DeprecatedSets through bytes
     *     the amount of dynamic shared memory that will be available to each
     *     thread block when the kernel given by hfunc is launched.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to specify dynamic shared-memory size for
     * @param bytes Dynamic shared-memory size per thread in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuFuncSetSharedSize(CUfunction hfunc, int bytes)
    {
        return checkResult(cuFuncSetSharedSizeNative(hfunc, bytes));
    }

    private static native int cuFuncSetSharedSizeNative(CUfunction hfunc, int bytes);


    /**
     * Sets the preferred cache configuration for a device function.
     * 
     *      * CUresult cuFuncSetCacheConfig (
     *      CUfunction hfunc,
     *      CUfunc_cache config )
     * 
     * 
     *   Sets the preferred cache configuration
     *     for a device function.  On devices where the L1 cache and shared memory
     *     use the same
     *     hardware resources, this sets through
     *     config the preferred cache configuration for the device
     *     function hfunc. This is only a preference. The driver will
     *     use the requested configuration if possible, but it is free to choose
     *     a different
     *     configuration if required to execute hfunc. Any context-wide preference set via cuCtxSetCacheConfig()
     *     will be overridden by this per-function setting unless the per-function
     *     setting is CU_FUNC_CACHE_PREFER_NONE. In that case, the current
     *     context-wide setting will be used.
     *   
     *   This setting does nothing on devices
     *     where the size of the L1 cache and shared memory are fixed.
     *   
     *   Launching a kernel with a different
     *     preference than the most recent preference setting may insert a
     *     device-side synchronization
     *     point.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to configure cache for
     * @param config Requested cache configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuFuncSetCacheConfig(CUfunction hfunc, int config)
    {
        return checkResult(cuFuncSetCacheConfigNative(hfunc, config));
    }

    private static native int cuFuncSetCacheConfigNative(CUfunction hfunc, int config);


    /**
     * Sets the shared memory configuration for a device function.
     * 
     *      * CUresult cuFuncSetSharedMemConfig (
     *      CUfunction hfunc,
     *      CUsharedconfig config )
     * 
     * 
     *   Sets the shared memory configuration for
     *     a device function.  On devices with configurable shared memory banks,
     *     this function
     *     will force all subsequent launches of
     *     the specified device function to have the given shared memory bank size
     *     configuration.
     *     On any given launch of the function, the
     *     shared memory configuration of the device will be temporarily changed
     *     if needed to
     *     suit the function's preferred
     *     configuration. Changes in shared memory configuration between subsequent
     *     launches of functions,
     *     may introduce a device side synchronization
     *     point.
     *   
     *   Any per-function setting of shared
     *     memory bank size set via cuFuncSetSharedMemConfig will override the
     *     context wide setting set with cuCtxSetSharedMemConfig.
     *   
     *   Changing the shared memory bank size
     *     will not increase shared memory usage or affect occupancy of kernels,
     *     but may have major
     *     effects on performance. Larger bank sizes
     *     will allow for greater potential bandwidth to shared memory, but will
     *     change what
     *     kinds of accesses to shared memory will
     *     result in bank conflicts.
     *   
     *   This function will do nothing on devices
     *     with fixed shared memory bank size.
     *   
     *   The supported bank configurations are:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
     *         use the context's shared memory configuration when launching this
     *         function.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
     *         to be natively four bytes when launching this function.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
     *         width to be natively eight bytes when launching this function.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc kernel to be given a shared memory config
     * @param config requested shared memory configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuCtxSetSharedMemConfigcuFuncGetAttribute
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuFuncSetSharedMemConfig(CUfunction hfunc, int config)
    {
        return checkResult(cuFuncSetSharedMemConfigNative(hfunc, config));
    }
    private static native int cuFuncSetSharedMemConfigNative(CUfunction hfunc, int config);

    /**
     * Creates a 1D or 2D CUDA array.
     * 
     *      * CUresult cuArrayCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY_DESCRIPTOR* pAllocateArray )
     * 
     * 
     *   Creates a 1D or 2D CUDA array.  Creates
     *     a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY_DESCRIPTOR is defined as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *     } CUDA_ARRAY_DESCRIPTOR;
     *   where:
     *   
     *     
     *       Width, and Height are the width, and height of the CUDA array (in elements);
     *         the CUDA array is one-dimensional if height is 0, two-dimensional
     *         otherwise;
     *       
     *     
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as: 
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   Here are examples of CUDA array
     *     descriptions:
     *   
     *   Description for a CUDA array of 2048
     *     floats: 
     *   
    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 1;
     *   
     *   Description for a 64 x 64 CUDA array of
     *     floats: 
     *   
    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *   
     *   Description for a width x height CUDA array of 64-bit, 4x16-bit float16's: 
     *   
   
     * CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *   
     *   Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit
     *     unsigned chars: 
     *   
    CUDA_ARRAY_DESCRIPTOR arrayDesc;
     *     desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
     *     desc.NumChannels = 2;
     *     desc.Width = width;
     *     desc.Height = height;
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pHandle Returned array
     * @param pAllocateArray Array descriptor
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray)
    {
        return checkResult(cuArrayCreateNative(pHandle, pAllocateArray));
    }

    private static native int cuArrayCreateNative(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray);


    /**
     * Get a 1D or 2D CUDA array descriptor.
     * 
     *      * CUresult cuArrayGetDescriptor (
     *      CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
     * 
     *   Get a 1D or 2D CUDA array descriptor. 
     *     Returns in *pArrayDescriptor a descriptor containing
     *     information on the format and dimensions of the CUDA array hArray. It is useful for subroutines that have been passed a CUDA
     *     array, but need to know the CUDA array parameters for validation
     *     or other purposes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pArrayDescriptor Returned array descriptor
     * @param hArray Array to get descriptor of
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray)
    {
        return checkResult(cuArrayGetDescriptorNative(pArrayDescriptor, hArray));
    }

    private static native int cuArrayGetDescriptorNative(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray);


    /**
     * Destroys a CUDA array.
     * 
     *      * CUresult cuArrayDestroy (
     *      CUarray hArray )
     * 
     * 
     *   Destroys a CUDA array.  Destroys the CUDA
     *     array hArray.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hArray Array to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ARRAY_IS_MAPPED
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuArrayDestroy(CUarray hArray)
    {
        return checkResult(cuArrayDestroyNative(hArray));
    }

    private static native int cuArrayDestroyNative(CUarray hArray);


    /**
     * Creates a 3D CUDA array.
     * 
     *      * CUresult cuArray3DCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray )
     * 
     * 
     *   Creates a 3D CUDA array.  Creates a CUDA
     *     array according to the CUDA_ARRAY3D_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY3D_DESCRIPTOR is defined as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
     *   where:
     *   
     *     
     *       
     *         Width, Height, and Depth are the width, height, and depth of
     *         the CUDA array (in elements); the following types of CUDA arrays can
     *         be allocated:
     *         
     *           
     *             A 1D array is allocated
     *               if Height and Depth extents are both zero.
     *             
     *           
     *           
     *             A 2D array is allocated
     *               if only Depth extent is zero.
     *             
     *           
     *           
     *             A 3D array is allocated
     *               if all three extents are non-zero.
     *             
     *           
     *           
     *             A 1D layered CUDA
     *               array is allocated if only Height is zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A 2D layered CUDA
     *               array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A cubemap CUDA array
     *               is allocated if all three extents are non-zero and the CUDA_ARRAY3D_CUBEMAP
     *               flag is set. Width must be equal to Height, and
     *               Depth must be six. A cubemap is a special type of 2D layered
     *               CUDA array, where the six layers represent the six faces of a cube.
     *               The order of the six
     *               layers in memory is the same as that listed in CUarray_cubemap_face.
     *             
     *           
     *           
     *             A cubemap layered CUDA
     *               array is allocated if all three extents are non-zero, and both,
     *               CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
     *               be a multiple of six. A cubemap layered CUDA array is a special type
     *               of 2D layered CUDA array that consists of a collection
     *               of cubemaps. The first
     *               six layers represent the first cubemap, the next six layers form the
     *               second cubemap, and so on.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as: 
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *   
     *   
     *   
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Flags may be set to
     *         
     *           
     *             CUDA_ARRAY3D_LAYERED
     *               to enable creation of layered CUDA arrays. If this flag is set, Depth specifies the number of layers, not the depth of a 3D
     *               array.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_SURFACE_LDST
     *               to enable surface references to be bound to the CUDA array. If this
     *               flag is not set, cuSurfRefSetArray will fail when attempting to bind
     *               the CUDA array to a surface reference.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_CUBEMAP
     *               to enable creation of cubemaps. If this flag is set, Width
     *               must be equal to Height, and Depth must be six. If
     *               the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must
     *               be a multiple of six.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_TEXTURE_GATHER
     *               to indicate that the CUDA array will be used for texture gather.
     *               Texture gather can only be performed on 2D CUDA arrays.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   Width, Height and
     *     Depth must meet certain size requirements as listed in the
     *     following table. All values are specified in elements. Note that for
     *     brevity's sake, the full name of the
     *     device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to
     *     the device attribute
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH.
     *   
     *   Note that 2D CUDA arrays have different
     *     size requirements if the CUDA_ARRAY3D_TEXTURE_GATHER flag is set. Width and Height must not be greater than
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH and
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in
     *     that case.
     *   
     *   
     *     
     *       
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *       
     *     
     *             CUDA array
     *               type
     *             
     *           
     *             Valid extents
     *               that must always be met
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           
     *             Valid extents
     *               with CUDA_ARRAY3D_SURFACE_LDST set
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           

     *             1D 
     *           
     *             { (1,TEXTURE1D_WIDTH),
     *               0, 0 } 
     *             
     *           
     *             { (1,SURFACE1D_WIDTH),
     *               0, 0 } 
     *             
     *           

     *             2D 
     *           
     *             { (1,TEXTURE2D_WIDTH),
     *               (1,TEXTURE2D_HEIGHT), 0 } 
     *             
     *           
     *             { (1,SURFACE2D_WIDTH),
     *               (1,SURFACE2D_HEIGHT), 0 } 
     *             
     *           

     *             3D 
     *           
     *             { (1,TEXTURE3D_WIDTH),
     *               (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } 
     *               OR
     *               {
     *               (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
     *               (1,TEXTURE3D_DEPTH_ALTERNATE) } 
     *             
     *           
     *             { (1,SURFACE3D_WIDTH),
     *               (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) } 
     *             
     *           

     *             1D Layered 
     *           
     *             {
     *               (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } 
     *             
     *           
     *             {
     *               (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) } 
     *             
     *           

     *             2D Layered 
     *           
     *             {
     *               (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
     *               (1,TEXTURE2D_LAYERED_LAYERS) } 
     *             
     *           
     *             {
     *               (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
     *               (1,SURFACE2D_LAYERED_LAYERS) } 
     *             
     *           

     *             Cubemap 
     *           
     *             { (1,TEXTURECUBEMAP_WIDTH),
     *               (1,TEXTURECUBEMAP_WIDTH), 6 } 
     *             
     *           
     *             { (1,SURFACECUBEMAP_WIDTH),
     *               (1,SURFACECUBEMAP_WIDTH), 6 } 
     *             
     *           

     *             Cubemap Layered 
     *           
     *             {
     *               (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
     *               (1,TEXTURECUBEMAP_LAYERED_LAYERS) } 
     *             
     *           
     *             {
     *               (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
     *               (1,SURFACECUBEMAP_LAYERED_LAYERS) } 
     *             
     *           
     *   
     *   
     *   Here are examples of CUDA array
     *     descriptions:
     *   
     *   Description for a CUDA array of 2048
     *     floats: 
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 0;
     *     desc.Depth = 0;
     *   
     *   Description for a 64 x 64 CUDA array of
     *     floats: 
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *     desc.Depth = 0;
     *   
     *   Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16's:
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *     desc.Depth = depth;
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pHandle Returned array
     * @param pAllocateArray 3D array descriptor
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray)
    {
        return checkResult(cuArray3DCreateNative(pHandle, pAllocateArray));
    }

    private static native int cuArray3DCreateNative(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray);


    /**
     * Get a 3D CUDA array descriptor.
     * 
     *      * CUresult cuArray3DGetDescriptor (
     *      CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
     * 
     *   Get a 3D CUDA array descriptor.  Returns
     *     in *pArrayDescriptor a descriptor containing information on
     *     the format and dimensions of the CUDA array hArray. It is
     *     useful for subroutines that have been passed a CUDA array, but need to
     *     know the CUDA array parameters for validation
     *     or other purposes.
     *   
     *   This function may be called on 1D and
     *     2D arrays, in which case the Height and/or Depth
     *     members of the descriptor struct will be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pArrayDescriptor Returned 3D array descriptor
     * @param hArray 3D array to get descriptor of
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */    
    public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray)
    {
        return checkResult(cuArray3DGetDescriptorNative(pArrayDescriptor, hArray));
    }

    private static native int cuArray3DGetDescriptorNative(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray);

    
    /**
     * Creates a CUDA mipmapped array.
     * 
     *      * CUresult cuMipmappedArrayCreate (
     *      CUmipmappedArray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
     *      unsigned int  numMipmapLevels )
     * 
     * 
     *   Creates a CUDA mipmapped array.  Creates
     *     a CUDA mipmapped array according to the CUDA_ARRAY3D_DESCRIPTOR
     *     structure pMipmappedArrayDesc and returns a handle to the
     *     new CUDA mipmapped array in *pHandle. numMipmapLevels
     *     specifies the number of mipmap levels to be allocated. This value is
     *     clamped to the range [1, 1 + floor(log2(max(width, height,
     *     depth)))].
     *   
     *   The CUDA_ARRAY3D_DESCRIPTOR is defined
     *     as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
     *   where:
     *   
     *     
     *       
     *         Width, Height, and Depth are the width, height, and depth of
     *         the CUDA array (in elements); the following types of CUDA arrays can
     *         be allocated:
     *         
     *           
     *             A 1D mipmapped array
     *               is allocated if Height and Depth extents are both
     *               zero.
     *             
     *           
     *           
     *             A 2D mipmapped array
     *               is allocated if only Depth extent is zero.
     *             
     *           
     *           
     *             A 3D mipmapped array
     *               is allocated if all three extents are non-zero.
     *             
     *           
     *           
     *             A 1D layered CUDA
     *               mipmapped array is allocated if only Height is zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A 2D layered CUDA
     *               mipmapped array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A cubemap CUDA
     *               mipmapped array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_CUBEMAP flag is set. Width must be equal to Height, and Depth must be six. A cubemap is a special
     *               type of 2D layered CUDA array, where the six layers represent the six
     *               faces of a cube.
     *               The order of the six
     *               layers in memory is the same as that listed in CUarray_cubemap_face.
     *             
     *           
     *           
     *             A cubemap layered CUDA
     *               mipmapped array is allocated if all three extents are non-zero, and
     *               both, CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
     *               be a multiple of six. A cubemap layered CUDA array is a special type
     *               of 2D layered CUDA array that consists of a collection
     *               of cubemaps. The first
     *               six layers represent the first cubemap, the next six layers form the
     *               second cubemap, and so on.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as: 
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *   
     *   
     *   
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Flags may be set to
     *         
     *           
     *             CUDA_ARRAY3D_LAYERED
     *               to enable creation of layered CUDA mipmapped arrays. If this flag is
     *               set, Depth specifies the number of layers, not the depth of
     *               a 3D array.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_SURFACE_LDST
     *               to enable surface references to be bound to individual mipmap levels
     *               of the CUDA mipmapped array. If this flag is not set,
     *               cuSurfRefSetArray will
     *               fail when attempting to bind a mipmap level of the CUDA mipmapped array
     *               to a surface reference.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_CUBEMAP
     *               to enable creation of mipmapped cubemaps. If this flag is set, Width must be equal to Height, and Depth must
     *               be six. If the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must be a multiple of six.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_TEXTURE_GATHER
     *               to indicate that the CUDA mipmapped array will be used for texture
     *               gather. Texture gather can only be performed on 2D CUDA
     *               mipmapped arrays.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   Width, Height and
     *     Depth must meet certain size requirements as listed in the
     *     following table. All values are specified in elements. Note that for
     *     brevity's sake, the full name of the
     *     device attribute is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH
     *     refers to the device
     *     attribute
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH.
     *   
     *   
     *     
     *       
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *       
     *     
     *             CUDA array
     *               type
     *             
     *           
     *             Valid extents
     *               that must always be met
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           

     *             1D 
     *           
     *             {
     *               (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 } 
     *             
     *           

     *             2D 
     *           
     *             {
     *               (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }
     *             
     *           

     *             3D 
     *           
     *             { (1,TEXTURE3D_WIDTH),
     *               (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } 
     *               OR
     *               {
     *               (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
     *               (1,TEXTURE3D_DEPTH_ALTERNATE) } 
     *             
     *           

     *             1D Layered 
     *           
     *             {
     *               (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } 
     *             
     *           

     *             2D Layered 
     *           
     *             {
     *               (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
     *               (1,TEXTURE2D_LAYERED_LAYERS) } 
     *             
     *           

     *             Cubemap 
     *           
     *             { (1,TEXTURECUBEMAP_WIDTH),
     *               (1,TEXTURECUBEMAP_WIDTH), 6 } 
     *             
     *           

     *             Cubemap Layered 
     *           
     *             {
     *               (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
     *               (1,TEXTURECUBEMAP_LAYERED_LAYERS) } 
     *             
     *           
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pHandle Returned mipmapped array
     * @param pMipmappedArrayDesc mipmapped array descriptor
     * @param numMipmapLevels Number of mipmap levels
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuMipmappedArrayDestroy
     * @see JCudaDriver#cuMipmappedArrayGetLevel
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayCreate(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels)
    {
        return checkResult(cuMipmappedArrayCreateNative(pHandle, pMipmappedArrayDesc, numMipmapLevels));
    }
    private static native int cuMipmappedArrayCreateNative(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels);
    
    /**
     * Gets a mipmap level of a CUDA mipmapped array.
     * 
     *      * CUresult cuMipmappedArrayGetLevel (
     *      CUarray* pLevelArray,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  level )
     * 
     * 
     *   Gets a mipmap level of a CUDA mipmapped
     *     array.  Returns in *pLevelArray a CUDA array that represents
     *     a single mipmap level of the CUDA mipmapped array hMipmappedArray.
     *   
     *   If level is greater than the
     *     maximum number of levels in this mipmapped array, CUDA_ERROR_INVALID_VALUE
     *     is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pLevelArray Returned mipmap level CUDA array
     * @param hMipmappedArray CUDA mipmapped array
     * @param level Mipmap level
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuMipmappedArrayCreate
     * @see JCudaDriver#cuMipmappedArrayDestroy
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayGetLevel(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level)
    {
        return checkResult(cuMipmappedArrayGetLevelNative(pLevelArray, hMipmappedArray, level));
    }
    private static native int cuMipmappedArrayGetLevelNative(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level);
    
    
    /**
     * Destroys a CUDA mipmapped array.
     * 
     *      * CUresult cuMipmappedArrayDestroy (
     *      CUmipmappedArray hMipmappedArray )
     * 
     * 
     *   Destroys a CUDA mipmapped array.  Destroys
     *     the CUDA mipmapped array hMipmappedArray.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hMipmappedArray Mipmapped array to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ARRAY_IS_MAPPED
     * 
     * @see JCudaDriver#cuMipmappedArrayCreate
     * @see JCudaDriver#cuMipmappedArrayGetLevel
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray)
    {
        return checkResult(cuMipmappedArrayDestroyNative(hMipmappedArray));
    }
    private static native int cuMipmappedArrayDestroyNative(CUmipmappedArray hMipmappedArray);
    
    /**
     * Creates a texture reference.
     * 
     *      * CUresult cuTexRefCreate (
     *      CUtexref* pTexRef )
     * 
     * 
     *   Creates a texture reference.  
     *     DeprecatedCreates a texture reference
     *     and returns its handle in *pTexRef. Once created, the
     *     application must call cuTexRefSetArray() or cuTexRefSetAddress() to
     *     associate the reference with allocated memory. Other texture reference
     *     functions are used to specify the format and interpretation
     *     (addressing, filtering, etc.) to be used
     *     when the memory is read through this texture reference.
     *   
     * 
     * 
     * @param pTexRef Returned texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefDestroy
     */    
    public static int cuTexRefCreate(CUtexref pTexRef)
    {
        return checkResult(cuTexRefCreateNative(pTexRef));
    }

    private static native int cuTexRefCreateNative(CUtexref pTexRef);


    /**
     * Destroys a texture reference.
     * 
     *      * CUresult cuTexRefDestroy (
     *      CUtexref hTexRef )
     * 
     * 
     *   Destroys a texture reference.  
     *     DeprecatedDestroys the texture reference
     *     specified by hTexRef.
     *   
     * 
     * 
     * @param hTexRef Texture reference to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefCreate
     */    
    public static int cuTexRefDestroy(CUtexref hTexRef)
    {
        return checkResult(cuTexRefDestroyNative(hTexRef));
    }

    private static native int cuTexRefDestroyNative(CUtexref hTexRef);


    /**
     * Binds an array as a texture reference.
     * 
     *      * CUresult cuTexRefSetArray (
     *      CUtexref hTexRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Binds an array as a texture reference. 
     *     Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state associated with
     *     the texture reference is superseded by this function. Flags
     *     must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound
     *     to hTexRef is unbound.
     *   
     * 
     * 
     * @param hTexRef Texture reference to bind
     * @param hArray Array to bind
     * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags)
    {
        return checkResult(cuTexRefSetArrayNative(hTexRef, hArray, Flags));
    }
    private static native int cuTexRefSetArrayNative(CUtexref hTexRef, CUarray hArray, int Flags);
    
    
    /**
     * Binds a mipmapped array to a texture reference.
     * 
     *      * CUresult cuTexRefSetMipmappedArray (
     *      CUtexref hTexRef,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Binds a mipmapped array to a texture
     *     reference.  Binds the CUDA mipmapped array hMipmappedArray
     *     to the texture reference hTexRef. Any previous address or
     *     CUDA array state associated with the texture reference is superseded
     *     by this function. Flags must be set to CU_TRSA_OVERRIDE_FORMAT.
     *     Any CUDA array previously bound to hTexRef is unbound.
     *   
     * 
     * 
     * @param hTexRef Texture reference to bind
     * @param hMipmappedArray Mipmapped array to bind
     * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags)
    {
        return checkResult(cuTexRefSetMipmappedArrayNative(hTexRef, hMipmappedArray, Flags));
    }
    private static native int cuTexRefSetMipmappedArrayNative(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags);


    /**
     * Binds an address as a texture reference.
     * 
     *      * CUresult cuTexRefSetAddress (
     *      size_t* ByteOffset,
     *      CUtexref hTexRef,
     *      CUdeviceptr dptr,
     *      size_t bytes )
     * 
     * 
     *   Binds an address as a texture reference.
     *     Binds a linear address range to the texture reference hTexRef.
     *     Any previous address or CUDA array state associated with the texture
     *     reference is superseded by this function. Any memory
     *     previously bound to hTexRef is
     *     unbound.
     *   
     *   Since the hardware enforces an alignment
     *     requirement on texture base addresses, cuTexRefSetAddress() passes back
     *     a byte offset in *ByteOffset that must be applied to texture
     *     fetches in order to read from the desired memory. This offset must be
     *     divided by the texel
     *     size and passed to kernels that read from
     *     the texture so they can be applied to the tex1Dfetch() function.
     *   
     *   If the device memory pointer was returned
     *     from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be
     *     passed as the ByteOffset parameter.
     *   
     *   The total number of elements (or texels)
     *     in the linear address range cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
     *     elements is computed as (bytes / bytesPerElement), where
     *     bytesPerElement is determined from the data format and number of
     *     components set using cuTexRefSetFormat().
     *   
     * 
     * 
     * @param ByteOffset Returned byte offset
     * @param hTexRef Texture reference to bind
     * @param dptr Device pointer to bind
     * @param bytes Size of memory to bind in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetAddress(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes)
    {
        return checkResult(cuTexRefSetAddressNative(ByteOffset, hTexRef, dptr, bytes));
    }

    private static native int cuTexRefSetAddressNative(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes);


    /**
     * Sets the format for a texture reference.
     * 
     *      * CUresult cuTexRefSetFormat (
     *      CUtexref hTexRef,
     *      CUarray_format fmt,
     *      int  NumPackedComponents )
     * 
     * 
     *   Sets the format for a texture reference.
     *     Specifies the format of the data to be read by the texture reference
     *     hTexRef. fmt and NumPackedComponents are
     *     exactly analogous to the Format and NumChannels members of the
     *     CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each
     *     component and the number of components per array element.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param fmt Format to set
     * @param NumPackedComponents Number of components per array element
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents)
    {
        return checkResult(cuTexRefSetFormatNative(hTexRef, fmt, NumPackedComponents));
    }

    private static native int cuTexRefSetFormatNative(CUtexref hTexRef, int fmt, int NumPackedComponents);



    /**
     * Binds an address as a 2D texture reference.
     * 
     *      * CUresult cuTexRefSetAddress2D (
     *      CUtexref hTexRef,
     *      const CUDA_ARRAY_DESCRIPTOR* desc,
     *      CUdeviceptr dptr,
     *      size_t Pitch )
     * 
     * 
     *   Binds an address as a 2D texture
     *     reference.  Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with
     *     the texture reference is superseded by this function. Any memory
     *     previously bound to hTexRef is
     *     unbound.
     *   
     *   Using a tex2D() function inside a kernel
     *     requires a call to either cuTexRefSetArray() to bind the corresponding
     *     texture reference to an array, or cuTexRefSetAddress2D() to bind the
     *     texture reference to linear memory.
     *   
     *   Function calls to cuTexRefSetFormat()
     *     cannot follow calls to cuTexRefSetAddress2D() for the same texture
     *     reference.
     *   
     *   It is required that dptr be
     *     aligned to the appropriate hardware-specific texture alignment. You
     *     can query this value using the device attribute
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned dptr
     *     is supplied, CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   Pitch has to be aligned to
     *     the hardware-specific texture pitch alignment. This value can be
     *     queried using the device attribute
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT.
     *     If an unaligned Pitch is supplied, CUDA_ERROR_INVALID_VALUE
     *     is returned.
     *   
     *   Width and Height, which are specified
     *     in elements (or texels), cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. Pitch, which is specified in bytes, cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
     *   
     * 
     * 
     * @param hTexRef Texture reference to bind
     * @param desc Descriptor of CUDA array
     * @param dptr Device pointer to bind
     * @param Pitch Line pitch in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes)
    {
        return checkResult(cuTexRefSetAddress2DNative(hTexRef, desc, dptr, PitchInBytes));
    }
    private static native int cuTexRefSetAddress2DNative(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes);



    /**
     * Sets the addressing mode for a texture reference.
     * 
     *      * CUresult cuTexRefSetAddressMode (
     *      CUtexref hTexRef,
     *      int  dim,
     *      CUaddress_mode am )
     * 
     * 
     *   Sets the addressing mode for a texture
     *     reference.  Specifies the addressing mode am for the given
     *     dimension dim of the texture reference hTexRef. If
     *     dim is zero, the addressing mode is applied to the first
     *     parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as:
     *   
   typedef enum CUaddress_mode_enum {
     *       CU_TR_ADDRESS_MODE_WRAP = 0,
     *       CU_TR_ADDRESS_MODE_CLAMP = 1,
     *       CU_TR_ADDRESS_MODE_MIRROR = 2,
     *       CU_TR_ADDRESS_MODE_BORDER = 3
     *    } CUaddress_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory. Also, if the flag,
     *     CU_TRSF_NORMALIZED_COORDINATES, is not set, the only supported address
     *     mode is CU_TR_ADDRESS_MODE_CLAMP.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param dim Dimension
     * @param am Addressing mode to set
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am)
    {
        return checkResult(cuTexRefSetAddressModeNative(hTexRef, dim, am));
    }

    private static native int cuTexRefSetAddressModeNative(CUtexref hTexRef, int dim, int am);


    /**
     * Sets the filtering mode for a texture reference.
     * 
     *      * CUresult cuTexRefSetFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
     * 
     *   Sets the filtering mode for a texture
     *     reference.  Specifies the filtering mode fm to be used when
     *     reading memory through the texture reference hTexRef.
     *     CUfilter_mode_enum is defined as:
     *   
     *      typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param fm Filtering mode to set
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm)
    {
        return checkResult(cuTexRefSetFilterModeNative(hTexRef, fm));
    }

    private static native int cuTexRefSetFilterModeNative(CUtexref hTexRef, int fm);

    
    /**
     * Sets the mipmap filtering mode for a texture reference.
     * 
     *      * CUresult cuTexRefSetMipmapFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
     * 
     *   Sets the mipmap filtering mode for a
     *     texture reference.  Specifies the mipmap filtering mode fm
     *     to be used when reading memory through the texture reference hTexRef. CUfilter_mode_enum is defined as:
     *   
     *      typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param fm Filtering mode to set
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefSetMipmapFilterMode(CUtexref hTexRef, int fm)
    {
        return checkResult(cuTexRefSetMipmapFilterModeNative(hTexRef, fm));
    }
    private static native int cuTexRefSetMipmapFilterModeNative(CUtexref hTexRef, int fm);

    
    /**
     * Sets the mipmap level bias for a texture reference.
     * 
     *      * CUresult cuTexRefSetMipmapLevelBias (
     *      CUtexref hTexRef,
     *      float  bias )
     * 
     * 
     *   Sets the mipmap level bias for a texture
     *     reference.  Specifies the mipmap level bias bias to be added
     *     to the specified mipmap level when reading memory through the texture
     *     reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param bias Mipmap level bias
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias)
    {
        return checkResult(cuTexRefSetMipmapLevelBiasNative(hTexRef, bias));
    }
    private static native int cuTexRefSetMipmapLevelBiasNative(CUtexref hTexRef, float bias);
    
    
    /**
     * Sets the mipmap min/max mipmap level clamps for a texture reference.
     * 
     *      * CUresult cuTexRefSetMipmapLevelClamp (
     *      CUtexref hTexRef,
     *      float  minMipmapLevelClamp,
     *      float  maxMipmapLevelClamp )
     * 
     * 
     *   Sets the mipmap min/max mipmap level
     *     clamps for a texture reference.  Specifies the min/max mipmap level
     *     clamps, minMipmapLevelClamp and maxMipmapLevelClamp
     *     respectively, to be used when reading memory through the texture
     *     reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param minMipmapLevelClamp Mipmap min level clamp
     * @param maxMipmapLevelClamp Mipmap max level clamp
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp)
    {
        return checkResult(cuTexRefSetMipmapLevelClampNative(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp));
    }
    private static native int cuTexRefSetMipmapLevelClampNative(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
    
    
    /**
     * Sets the maximum anistropy for a texture reference.
     * 
     *      * CUresult cuTexRefSetMaxAnisotropy (
     *      CUtexref hTexRef,
     *      unsigned int  maxAniso )
     * 
     * 
     *   Sets the maximum anistropy for a texture
     *     reference.  Specifies the maximum aniostropy maxAniso to be
     *     used when reading memory through the texture reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory.
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param maxAniso Maximum anisotropy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefSetMaxAnisotropy(CUtexref hTexRef, int maxAniso)
    {
        return checkResult(cuTexRefSetMaxAnisotropyNative(hTexRef, maxAniso));
    }
    private static native int cuTexRefSetMaxAnisotropyNative(CUtexref hTexRef, int maxAniso);
    
    /**
     * Sets the flags for a texture reference.
     * 
     *      * CUresult cuTexRefSetFlags (
     *      CUtexref hTexRef,
     *      unsigned int  Flags )
     * 
     * 
     *   Sets the flags for a texture reference. 
     *     Specifies optional flags via Flags to specify the behavior
     *     of data returned through the texture reference hTexRef. The
     *     valid flags are:
     *   
     *   
     *     
     *       CU_TRSF_READ_AS_INTEGER, which
     *         suppresses the default behavior of having the texture promote integer
     *         data to floating point data in the range [0,
     *         1]. Note that texture with
     *         32-bit integer format would not be promoted, regardless of whether or
     *         not this flag is specified;
     *       
     *     
     *     
     *       CU_TRSF_NORMALIZED_COORDINATES,
     *         which suppresses the default behavior of having the texture coordinates
     *         range from [0, Dim) where Dim is the width or height
     *         of the CUDA array. Instead, the
     *         texture coordinates [0, 1.0) reference the entire breadth of the array
     *         dimension;
     *       
     *     
     *   
     *   
     * 
     * 
     * @param hTexRef Texture reference
     * @param Flags Optional flags to set
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefSetFlags(CUtexref hTexRef, int Flags)
    {
        return checkResult(cuTexRefSetFlagsNative(hTexRef, Flags));
    }

    private static native int cuTexRefSetFlagsNative(CUtexref hTexRef, int Flags);


    /**
     * Gets the address associated with a texture reference.
     * 
     *      * CUresult cuTexRefGetAddress (
     *      CUdeviceptr* pdptr,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the address associated with a
     *     texture reference.  Returns in *pdptr the base address bound
     *     to the texture reference hTexRef, or returns
     *     CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any
     *     device memory range.
     *   
     * 
     * 
     * @param pdptr Returned device address
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetAddressNative(pdptr, hTexRef));
    }

    private static native int cuTexRefGetAddressNative(CUdeviceptr pdptr, CUtexref hTexRef);


    /**
     * Gets the array bound to a texture reference.
     * 
     *      * CUresult cuTexRefGetArray (
     *      CUarray* phArray,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the array bound to a texture
     *     reference.  Returns in *phArray the CUDA array bound to the
     *     texture reference hTexRef, or returns CUDA_ERROR_INVALID_VALUE
     *     if the texture reference is not bound to any CUDA array.
     *   
     * 
     * 
     * @param phArray Returned array
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetArrayNative(phArray, hTexRef));
    }

    private static native int cuTexRefGetArrayNative(CUarray phArray, CUtexref hTexRef);

    
    /**
     * Gets the mipmapped array bound to a texture reference.
     * 
     *      * CUresult cuTexRefGetMipmappedArray (
     *      CUmipmappedArray* phMipmappedArray,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmapped array bound to a
     *     texture reference.  Returns in *phMipmappedArray the CUDA
     *     mipmapped array bound to the texture reference hTexRef, or
     *     returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound
     *     to any CUDA mipmapped array.
     *   
     * 
     * 
     * @param phMipmappedArray Returned mipmapped array
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefGetMipmappedArray(CUmipmappedArray phMipmappedArray, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmappedArrayNative(phMipmappedArray, hTexRef));
    }
    private static native int cuTexRefGetMipmappedArrayNative(CUmipmappedArray phMipmappedArray, CUtexref hTexRef);

    /**
     * Gets the addressing mode used by a texture reference.
     * 
     *      * CUresult cuTexRefGetAddressMode (
     *      CUaddress_mode* pam,
     *      CUtexref hTexRef,
     *      int  dim )
     * 
     * 
     *   Gets the addressing mode used by a
     *     texture reference.  Returns in *pam the addressing mode
     *     corresponding to the dimension dim of the texture reference
     *     hTexRef. Currently, the only valid value for dim
     *     are 0 and 1.
     *   
     * 
     * 
     * @param pam Returned addressing mode
     * @param hTexRef Texture reference
     * @param dim Dimension
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefGetAddressMode(int pam[], CUtexref hTexRef, int dim)
    {
        return checkResult(cuTexRefGetAddressModeNative(pam, hTexRef, dim));
    }

    private static native int cuTexRefGetAddressModeNative(int pam[], CUtexref hTexRef, int dim);


    /**
     * Gets the filter-mode used by a texture reference.
     * 
     *      * CUresult cuTexRefGetFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the filter-mode used by a texture
     *     reference.  Returns in *pfm the filtering mode of the texture
     *     reference hTexRef.
     *   
     * 
     * 
     * @param pfm Returned filtering mode
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefGetFilterMode(int pfm[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFilterModeNative(pfm, hTexRef));
    }

    private static native int cuTexRefGetFilterModeNative(int pfm[], CUtexref hTexRef);


    /**
     * Gets the format used by a texture reference.
     * 
     *      * CUresult cuTexRefGetFormat (
     *      CUarray_format* pFormat,
     *      int* pNumChannels,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the format used by a texture
     *     reference.  Returns in *pFormat and *pNumChannels
     *     the format and number of components of the CUDA array bound to the
     *     texture reference hTexRef. If pFormat or pNumChannels is NULL, it will be ignored.
     *   
     * 
     * 
     * @param pFormat Returned format
     * @param pNumChannels Returned number of components
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     */    
    public static int cuTexRefGetFormat(int pFormat[], int pNumChannels[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFormatNative(pFormat, pNumChannels, hTexRef));
    }

    private static native int cuTexRefGetFormatNative(int pFormat[], int pNumChannels[], CUtexref hTexRef);

    
    /**
     * Gets the mipmap filtering mode for a texture reference.
     * 
     *      * CUresult cuTexRefGetMipmapFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmap filtering mode for a
     *     texture reference.  Returns the mipmap filtering mode in pfm
     *     that's used when reading memory through the texture reference hTexRef.
     *   
     * 
     * 
     * @param pfm Returned mipmap filtering mode
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefGetMipmapFilterMode(int pfm[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapFilterModeNative(pfm, hTexRef));
    }
    private static native int cuTexRefGetMipmapFilterModeNative(int pfm[], CUtexref hTexRef);

    /**
     * Gets the mipmap level bias for a texture reference.
     * 
     *      * CUresult cuTexRefGetMipmapLevelBias (
     *      float* pbias,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmap level bias for a texture
     *     reference.  Returns the mipmap level bias in pBias that's
     *     added to the specified mipmap level when reading memory through the
     *     texture reference hTexRef.
     *   
     * 
     * 
     * @param pbias Returned mipmap level bias
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefGetMipmapLevelBias(float pbias[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapLevelBiasNative(pbias, hTexRef));
    }
    private static native int cuTexRefGetMipmapLevelBiasNative(float pbias[], CUtexref hTexRef);
    
    /**
     * Gets the min/max mipmap level clamps for a texture reference.
     * 
     *      * CUresult cuTexRefGetMipmapLevelClamp (
     *      float* pminMipmapLevelClamp,
     *      float* pmaxMipmapLevelClamp,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the min/max mipmap level clamps for
     *     a texture reference.  Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp that's
     *     used when reading memory through the texture reference hTexRef.
     *   
     * 
     * 
     * @param pminMipmapLevelClamp Returned mipmap min level clamp
     * @param pmaxMipmapLevelClamp Returned mipmap max level clamp
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefGetMipmapLevelClamp(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapLevelClampNative(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef));
    }
    private static native int cuTexRefGetMipmapLevelClampNative(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef);

    /**
     * Gets the maximum anistropy for a texture reference.
     * 
     *      * CUresult cuTexRefGetMaxAnisotropy (
     *      int* pmaxAniso,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the maximum anistropy for a texture
     *     reference.  Returns the maximum aniostropy in pmaxAniso
     *     that's used when reading memory through the texture reference hTexRef.
     *   
     * 
     * 
     * @param pmaxAniso Returned maximum anisotropy
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     */
    public static int cuTexRefGetMaxAnisotropy(int pmaxAniso[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMaxAnisotropyNative(pmaxAniso, hTexRef));
    }
    private static native int cuTexRefGetMaxAnisotropyNative(int pmaxAniso[], CUtexref hTexRef);
    
    
    /**
     * Gets the flags used by a texture reference.
     * 
     *      * CUresult cuTexRefGetFlags (
     *      unsigned int* pFlags,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the flags used by a texture
     *     reference.  Returns in *pFlags the flags of the texture
     *     reference hTexRef.
     *   
     * 
     * 
     * @param pFlags Returned flags
     * @param hTexRef Texture reference
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFormat
     */    
    public static int cuTexRefGetFlags(int pFlags[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFlagsNative(pFlags, hTexRef));
    }

    private static native int cuTexRefGetFlagsNative(int pFlags[], CUtexref hTexRef);


    /**
     * Sets the CUDA array for a surface reference.
     * 
     *      * CUresult cuSurfRefSetArray (
     *      CUsurfref hSurfRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Sets the CUDA array for a surface
     *     reference.  Sets the CUDA array hArray to be read and written
     *     by the surface reference hSurfRef. Any previous CUDA array
     *     state associated with the surface reference is superseded by this
     *     function. Flags must be set to 0. The CUDA_ARRAY3D_SURFACE_LDST
     *     flag must have been set for the CUDA array. Any CUDA array previously
     *     bound to hSurfRef is unbound.
     *   
     * 
     * 
     * @param hSurfRef Surface reference handle
     * @param hArray CUDA array handle
     * @param Flags set to 0
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuSurfRefGetArray
     */    
    public static int cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, int Flags )
    {
        return checkResult(cuSurfRefSetArrayNative(hSurfRef, hArray, Flags));
    }
    private static native int cuSurfRefSetArrayNative(CUsurfref hSurfRef, CUarray hArray, int Flags );

    /**
     * Passes back the CUDA array bound to a surface reference.
     * 
     *      * CUresult cuSurfRefGetArray (
     *      CUarray* phArray,
     *      CUsurfref hSurfRef )
     * 
     * 
     *   Passes back the CUDA array bound to a
     *     surface reference.  Returns in *phArray the CUDA array bound
     *     to the surface reference hSurfRef, or returns
     *     CUDA_ERROR_INVALID_VALUE if the surface reference is not bound to any
     *     CUDA array.
     *   
     * 
     * 
     * @param phArray Surface reference handle
     * @param hSurfRef Surface reference handle
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuSurfRefSetArray
     */    
    public static int cuSurfRefGetArray( CUarray phArray, CUsurfref hSurfRef )
    {
        return checkResult(cuSurfRefGetArrayNative(phArray, hSurfRef));
    }
    private static native int cuSurfRefGetArrayNative( CUarray phArray, CUsurfref hSurfRef );


    
    /**
     * Creates a texture object.
     * 
     *      * CUresult cuTexObjectCreate (
     *      CUtexObject* pTexObject,
     *      const CUDA_RESOURCE_DESC* pResDesc,
     *      const CUDA_TEXTURE_DESC* pTexDesc,
     *      const CUDA_RESOURCE_VIEW_DESC* pResViewDesc )
     * 
     * 
     *   Creates a texture object.  Creates a
     *     texture object and returns it in pTexObject. pResDesc
     *     describes the data to texture from. pTexDesc describes how
     *     the data should be sampled. pResViewDesc is an optional
     *     argument that specifies an alternate format for the data described by
     *     pResDesc, and also describes the subresource region to
     *     restrict access to when texturing. pResViewDesc can only be
     *     specified if the type of resource is a CUDA array or a CUDA mipmapped
     *     array.
     *   
     *   Texture objects are only supported on
     *     devices of compute capability 3.0 or higher.
     *   
     *   The CUDA_RESOURCE_DESC structure is
     *     defined as: 
     *   
        typedef struct CUDA_RESOURCE_DESC_st
     *         {
     *             CUresourcetype resType;
     * 
     *             union {
     *                 struct {
     *                     CUarray hArray;
     *                 } array;
     *                 struct {
     *                     CUmipmappedArray hMipmappedArray;
     *                 } mipmap;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t sizeInBytes;
     *                 } linear;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t width;
     *                     size_t height;
     *                     size_t pitchInBytes;
     *                 } pitch2D;
     *             } res;
     * 
     *             unsigned int flags;
     *         } CUDA_RESOURCE_DESC;
     *   where:
     *   
     *     
     *       
     *         CUDA_RESOURCE_DESC::resType
     *         specifies the type of resource to texture from. CUresourceType is
     *         defined as: 
     *                 typedef enum CUresourcetype_enum {
     *             CU_RESOURCE_TYPE_ARRAY           = 0x00,
     *             CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
     *             CU_RESOURCE_TYPE_LINEAR          = 0x02,
     *             CU_RESOURCE_TYPE_PITCH2D         = 0x03
     *         } CUresourcetype;
     *       
     *     
     *   
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_ARRAY, CUDA_RESOURCE_DESC::res::array::hArray must
     *     be set to a valid CUDA array handle.
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_MIPMAPPED_ARRAY,
     *     CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray must be set to a valid
     *     CUDA mipmapped array handle.
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_LINEAR, CUDA_RESOURCE_DESC::res::linear::devPtr
     *     must be set to a valid device pointer, that is aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::linear::format
     *     and CUDA_RESOURCE_DESC::res::linear::numChannels describe the format
     *     of each component
     *     and the number of components per array
     *     element. CUDA_RESOURCE_DESC::res::linear::sizeInBytes specifies the
     *     size of the array
     *     in bytes. The total number of elements
     *     in the linear address range cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
     *     elements is computed as (sizeInBytes / (sizeof(format) *
     *     numChannels)).
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_PITCH2D, CUDA_RESOURCE_DESC::res::pitch2D::devPtr
     *     must be set to a valid device pointer, that is aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::pitch2D::format
     *     and CUDA_RESOURCE_DESC::res::pitch2D::numChannels describe the format
     *     of each component
     *     and the number of components per array
     *     element. CUDA_RESOURCE_DESC::res::pitch2D::width and
     *     CUDA_RESOURCE_DESC::res::pitch2D::height
     *     specify the width and height of the array
     *     in elements, and cannot exceed CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
     *     and CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively.
     *     CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch
     *     between two rows in bytes and has to be
     *     aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
     *   
     *   
     *     
     *       flags must be set to zero.
     *     
     *   
     *   
     *   The CUDA_TEXTURE_DESC struct is defined
     *     as 
     *   
        typedef struct CUDA_TEXTURE_DESC_st {
     *             CUaddress_mode addressMode[3];
     *             CUfilter_mode filterMode;
     *             unsigned int flags;
     *             unsigned int maxAnisotropy;
     *             CUfilter_mode mipmapFilterMode;
     *             float mipmapLevelBias;
     *             float minMipmapLevelClamp;
     *             float maxMipmapLevelClamp;
     *         } CUDA_TEXTURE_DESC;
     *   where
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::addressMode
     *         specifies the addressing mode for each dimension of the texture data.
     *         CUaddress_mode is defined as: 
     *                 typedef enum
     * CUaddress_mode_enum {
     *             CU_TR_ADDRESS_MODE_WRAP = 0,
     *             CU_TR_ADDRESS_MODE_CLAMP = 1,
     *             CU_TR_ADDRESS_MODE_MIRROR = 2,
     *             CU_TR_ADDRESS_MODE_BORDER = 3
     *         } CUaddress_mode;
     *         This is ignored if
     *         CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. Also, if the
     *         flag, CU_TRSF_NORMALIZED_COORDINATES is not set, the only supported
     *         address mode is CU_TR_ADDRESS_MODE_CLAMP.
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::filterMode
     *         specifies the filtering mode to be used when fetching from the texture.
     *         CUfilter_mode is defined as: 
     *                 typedef enum CUfilter_mode_enum
     * {
     *             CU_TR_FILTER_MODE_POINT = 0,
     *             CU_TR_FILTER_MODE_LINEAR = 1
     *         } CUfilter_mode;
     *         This is ignored if
     *         CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR.
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::flags can
     *         be any combination of the following:
     *         
     *           
     *             CU_TRSF_READ_AS_INTEGER,
     *               which suppresses the default behavior of having the texture promote
     *               integer data to floating point data in the range [0,
     *               1]. Note that texture
     *               with 32-bit integer format would not be promoted, regardless of whether
     *               or not this flag is specified.
     *             
     *           
     *           
     *             CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior
     *               of having the texture coordinates range from [0, Dim) where Dim is the
     *               width or height
     *               of the CUDA array.
     *               Instead, the texture coordinates [0, 1.0) reference the entire breadth
     *               of the array dimension; Note that
     *               for CUDA mipmapped
     *               arrays, this flag has to be set.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::maxAnisotropy
     *         specifies the maximum anistropy ratio to be used when doing anisotropic
     *         filtering. This value will be clamped to the range
     *         [1,16].
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::mipmapFilterMode
     *         specifies the filter mode when the calculated mipmap level lies between
     *         two defined mipmap levels.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::mipmapLevelBias
     *         specifies the offset to be applied to the calculated mipmap level.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::minMipmapLevelClamp
     *         specifies the lower end of the mipmap level range to clamp access to.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::maxMipmapLevelClamp
     *         specifies the upper end of the mipmap level range to clamp access to.
     *       
     *     
     *   
     *   
     *   The CUDA_RESOURCE_VIEW_DESC struct is
     *     defined as 
     *   
        typedef struct CUDA_RESOURCE_VIEW_DESC_st
     *         {
     *             CUresourceViewFormat format;
     *             size_t width;
     *             size_t height;
     *             size_t depth;
     *             unsigned int firstMipmapLevel;
     *             unsigned int lastMipmapLevel;
     *             unsigned int firstLayer;
     *             unsigned int lastLayer;
     *         } CUDA_RESOURCE_VIEW_DESC;
     *   where:
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::format
     *         specifies how the data contained in the CUDA array or CUDA mipmapped
     *         array should be interpreted. Note that this can incur
     *         a change in size of the texture
     *         data. If the resource view format is a block compressed format, then
     *         the underlying CUDA array
     *         or CUDA mipmapped array has to
     *         have a base of format CU_AD_FORMAT_UNSIGNED_INT32. with 2 or 4 channels,
     *         depending on the block compressed format. For ex., BC1 and BC4 require
     *         the underlying CUDA array to
     *         have a format of
     *         CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats
     *         require the underlying resource to have the same base format but with
     *         4 channels.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::width
     *         specifies the new width of the texture data. If the resource view
     *         format is a block compressed format, this value has to
     *         be 4 times the original width
     *         of the resource. For non block compressed formats, this value has to
     *         be equal to that of the
     *         original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::height
     *         specifies the new height of the texture data. If the resource view
     *         format is a block compressed format, this value has to
     *         be 4 times the original height
     *         of the resource. For non block compressed formats, this value has to
     *         be equal to that of the
     *         original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::depth
     *         specifies the new depth of the texture data. This value has to be equal
     *         to that of the original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed
     *         mipmap level. This will be the new mipmap level zero. For non-mipmapped
     *         resources, this value
     *         has to be
     *         zero.CUDA_TEXTURE_DESC::minMipmapLevelClamp and
     *         CUDA_TEXTURE_DESC::maxMipmapLevelClamp will be relative to this value.
     *         For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp
     *         of 1.2 is specified,
     *         then the actual minimum mipmap
     *         level clamp will be 3.2.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel
     *         specifies the least detailed mipmap level. For non-mipmapped resources,
     *         this value has to be zero.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::firstLayer
     *         specifies the first layer index for layered textures. This will be the
     *         new layer zero. For non-layered resources, this value
     *         has to be zero.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::lastLayer
     *         specifies the last layer index for layered textures. For non-layered
     *         resources, this value has to be zero.
     *       
     *     
     *   
     *   
     * 
     * 
     * @param pTexObject Texture object to create
     * @param pResDesc Resource descriptor
     * @param pTexDesc Texture descriptor
     * @param pResViewDesc Resource view descriptor
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexObjectDestroy
     */
    public static int cuTexObjectCreate(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc)
    {
        return checkResult(cuTexObjectCreateNative(pTexObject, pResDesc, pTexDesc, pResViewDesc));
    }
    private static native int cuTexObjectCreateNative(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc);

    /**
     * Destroys a texture object.
     * 
     *      * CUresult cuTexObjectDestroy (
     *      CUtexObject texObject )
     * 
     * 
     *   Destroys a texture object.  Destroys the
     *     texture object specified by texObject.
     *   
     * 
     * 
     * @param texObject Texture object to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectDestroy(CUtexObject texObject)
    {
        return checkResult(cuTexObjectDestroyNative(texObject));
    }
    private static native int cuTexObjectDestroyNative(CUtexObject texObject);
    
    
    /**
     * Returns a texture object's resource descriptor.
     * 
     *      * CUresult cuTexObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's resource
     *     descriptor.  Returns the resource descriptor for the texture object
     *     specified by texObject.
     *   
     * 
     * 
     * @param pResDesc Resource descriptor
     * @param texObject Texture object
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetResourceDescNative(pResDesc, texObject));
    }
    private static native int cuTexObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject);

    /**
     * Returns a texture object's texture descriptor.
     * 
     *      * CUresult cuTexObjectGetTextureDesc (
     *      CUDA_TEXTURE_DESC* pTexDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's texture
     *     descriptor.  Returns the texture descriptor for the texture object
     *     specified by texObject.
     *   
     * 
     * 
     * @param pTexDesc Texture descriptor
     * @param texObject Texture object
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetTextureDescNative(pTexDesc, texObject));
    }
    private static native int cuTexObjectGetTextureDescNative(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject);

    /**
     * Returns a texture object's resource view descriptor.
     * 
     *      * CUresult cuTexObjectGetResourceViewDesc (
     *      CUDA_RESOURCE_VIEW_DESC* pResViewDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's resource view
     *     descriptor.  Returns the resource view descriptor for the texture
     *     object specified
     *     by texObject. If no resource
     *     view was set for texObject, the CUDA_ERROR_INVALID_VALUE is
     *     returned.
     *   
     * 
     * 
     * @param pResViewDesc Resource view descriptor
     * @param texObject Texture object
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetResourceViewDescNative(pResViewDesc, texObject));
        
    }
    private static native int cuTexObjectGetResourceViewDescNative(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject);

    /**
     * Creates a surface object.
     * 
     *      * CUresult cuSurfObjectCreate (
     *      CUsurfObject* pSurfObject,
     *      const CUDA_RESOURCE_DESC* pResDesc )
     * 
     * 
     *   Creates a surface object.  Creates a
     *     surface object and returns it in pSurfObject. pResDesc describes the data to perform surface load/stores on.
     *     CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and
     *     CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA
     *     array handle. CUDA_RESOURCE_DESC::flags must be set to zero.
     *   
     *   Surface objects are only supported on
     *     devices of compute capability 3.0 or higher.
     *   
     * 
     * 
     * @param pSurfObject Surface object to create
     * @param pResDesc Resource descriptor
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuSurfObjectDestroy
     */
    public static int cuSurfObjectCreate(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc)
    {
        return checkResult(cuSurfObjectCreateNative(pSurfObject, pResDesc));
    }
    private static native int cuSurfObjectCreateNative(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc);

    /**
     * Destroys a surface object.
     * 
     *      * CUresult cuSurfObjectDestroy (
     *      CUsurfObject surfObject )
     * 
     * 
     *   Destroys a surface object.  Destroys the
     *     surface object specified by surfObject.
     *   
     * 
     * 
     * @param surfObject Surface object to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuSurfObjectCreate
     */
    public static int cuSurfObjectDestroy(CUsurfObject surfObject)
    {
        return checkResult(cuSurfObjectDestroyNative(surfObject));
    }
    private static native int cuSurfObjectDestroyNative(CUsurfObject surfObject);

    /**
     * Returns a surface object's resource descriptor.
     * 
     *      * CUresult cuSurfObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUsurfObject surfObject )
     * 
     * 
     *   Returns a surface object's resource
     *     descriptor.  Returns the resource descriptor for the surface object
     *     specified by surfObject.
     *   
     * 
     * 
     * @param pResDesc Resource descriptor
     * @param surfObject Surface object
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuSurfObjectCreate
     */
    public static int cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject)
    {
        return checkResult(cuSurfObjectGetResourceDescNative(pResDesc, surfObject));
    }
    private static native int cuSurfObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject);

    
    /**
     * Queries if a device may directly access a peer device's memory.
     * 
     *      * CUresult cuDeviceCanAccessPeer (
     *      int* canAccessPeer,
     *      CUdevice dev,
     *      CUdevice peerDev )
     * 
     * 
     *   Queries if a device may directly access
     *     a peer device's memory.  Returns in *canAccessPeer a value
     *     of 1 if contexts on dev are capable of directly accessing
     *     memory from contexts on peerDev and 0 otherwise. If direct
     *     access of peerDev from dev is possible, then access
     *     may be enabled on two specific contexts by calling
     *     cuCtxEnablePeerAccess().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param canAccessPeer Returned access capability
     * @param dev Device from which allocations on peerDev are to be directly accessed.
     * @param peerDev Device on which the allocations to be directly accessed by dev reside.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuCtxEnablePeerAccess
     * @see JCudaDriver#cuCtxDisablePeerAccess
     */    
    public static int cuDeviceCanAccessPeer(int canAccessPeer[], CUdevice dev, CUdevice peerDev)
    {
        return checkResult(cuDeviceCanAccessPeerNative(canAccessPeer, dev, peerDev));
    }
    private static native int cuDeviceCanAccessPeerNative(int canAccessPeer[], CUdevice dev, CUdevice peerDev);


    /**
     * Enables direct access to memory allocations in a peer context.
     * 
     *      * CUresult cuCtxEnablePeerAccess (
     *      CUcontext peerContext,
     *      unsigned int  Flags )
     * 
     * 
     *   Enables direct access to memory
     *     allocations in a peer context.  If both the current context and peerContext are on devices which support unified addressing (as
     *     may be queried using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same
     *     major compute capability, then on success all allocations from peerContext will immediately be accessible by the current context.
     *     See Unified Addressing for additional details.
     *   
     *   Note that access granted by this call
     *     is unidirectional and that in order to access memory from the current
     *     context in peerContext, a separate symmetric call to
     *     cuCtxEnablePeerAccess() is required.
     *   
     *   Returns CUDA_ERROR_PEER_ACCESS_UNSUPPORTED
     *     if cuDeviceCanAccessPeer() indicates that the CUdevice of the current
     *     context cannot directly access memory from the CUdevice of peerContext.
     *   
     *   Returns CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
     *     if direct access of peerContext from the current context has
     *     already been enabled.
     *   
     *   Returns CUDA_ERROR_TOO_MANY_PEERS if
     *     direct peer access is not possible because hardware resources required
     *     for peer access have been exhausted.
     *   
     *   Returns CUDA_ERROR_INVALID_CONTEXT if
     *     there is no current context, peerContext is not a valid
     *     context, or if the current context is peerContext.
     *   
     *   Returns CUDA_ERROR_INVALID_VALUE if Flags is not 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param peerContext Peer context to enable direct access to from the current context
     * @param Flags Reserved for future use and must be set to 0
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_TOO_MANY_PEERS,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
     * CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuDeviceCanAccessPeer
     * @see JCudaDriver#cuCtxDisablePeerAccess
     */    
    public static int cuCtxEnablePeerAccess(CUcontext peerContext, int Flags)
    {
        return checkResult(cuCtxEnablePeerAccessNative(peerContext, Flags));
    }
    private static native int cuCtxEnablePeerAccessNative(CUcontext peerContext, int Flags);


    /**
     * Disables direct access to memory allocations in a peer context and unregisters any registered allocations.
     * 
     *      * CUresult cuCtxDisablePeerAccess (
     *      CUcontext peerContext )
     * 
     * 
     *   Disables direct access to memory
     *     allocations in a peer context and unregisters any registered allocations.
     *     Returns CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has
     *     not yet been enabled from peerContext to the current
     *     context.
     *   
     *   Returns CUDA_ERROR_INVALID_CONTEXT if
     *     there is no current context, or if peerContext is not a valid
     *     context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param peerContext Peer context to disable direct access to
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_INVALID_CONTEXT,
     * 
     * @see JCudaDriver#cuDeviceCanAccessPeer
     * @see JCudaDriver#cuCtxEnablePeerAccess
     */    
    public static int cuCtxDisablePeerAccess(CUcontext peerContext)
    {
        return checkResult(cuCtxDisablePeerAccessNative(peerContext));
    }
    private static native int cuCtxDisablePeerAccessNative(CUcontext peerContext);


    /**
     * Sets the parameter size for the function.
     * 
     *      * CUresult cuParamSetSize (
     *      CUfunction hfunc,
     *      unsigned int  numbytes )
     * 
     * 
     *   Sets the parameter size for the function.
     *     DeprecatedSets through numbytes
     *     the total size in bytes needed by the function parameters of the kernel
     *     corresponding to hfunc.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to set parameter size for
     * @param numbytes Size of parameter list in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuParamSetSize(CUfunction hfunc, int numbytes)
    {
        return checkResult(cuParamSetSizeNative(hfunc, numbytes));
    }

    private static native int cuParamSetSizeNative(CUfunction hfunc, int numbytes);


    /**
     * Adds an integer parameter to the function's argument list.
     * 
     *      * CUresult cuParamSeti (
     *      CUfunction hfunc,
     *      int  offset,
     *      unsigned int  value )
     * 
     * 
     *   Adds an integer parameter to the
     *     function's argument list.  
     *     DeprecatedSets an integer parameter that
     *     will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to add parameter to
     * @param offset Offset to add parameter to argument list
     * @param value Value of parameter
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuParamSeti(CUfunction hfunc, int offset, int value)
    {
        return checkResult(cuParamSetiNative(hfunc, offset, value));
    }

    private static native int cuParamSetiNative(CUfunction hfunc, int offset, int value);


    /**
     * Adds a floating-point parameter to the function's argument list.
     * 
     *      * CUresult cuParamSetf (
     *      CUfunction hfunc,
     *      int  offset,
     *      float  value )
     * 
     * 
     *   Adds a floating-point parameter to the
     *     function's argument list.  
     *     DeprecatedSets a floating-point parameter
     *     that will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to add parameter to
     * @param offset Offset to add parameter to argument list
     * @param value Value of parameter
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuParamSetf(CUfunction hfunc, int offset, float value)
    {
        return checkResult(cuParamSetfNative(hfunc, offset, value));
    }
    private static native int cuParamSetfNative(CUfunction hfunc, int offset, float value);


    /**
     * Adds arbitrary data to the function's argument list.
     * 
     *      * CUresult cuParamSetv (
     *      CUfunction hfunc,
     *      int  offset,
     *      void* ptr,
     *      unsigned int  numbytes )
     * 
     * 
     *   Adds arbitrary data to the function's
     *     argument list.  
     *     DeprecatedCopies an arbitrary amount of
     *     data (specified in numbytes) from ptr into the
     *     parameter space of the kernel corresponding to hfunc. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to add data to
     * @param offset Offset to add data to argument list
     * @param ptr Pointer to arbitrary data
     * @param numbytes Size of data to copy in bytes
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes)
    {
        return checkResult(cuParamSetvNative(hfunc, offset, ptr, numbytes));
    }

    private static native int cuParamSetvNative(CUfunction hfunc, int offset, Pointer ptr, int numbytes);


    /**
     * Adds a texture-reference to the function's argument list.
     * 
     *      * CUresult cuParamSetTexRef (
     *      CUfunction hfunc,
     *      int  texunit,
     *      CUtexref hTexRef )
     * 
     * 
     *   Adds a texture-reference to the function's
     *     argument list.  
     *     DeprecatedMakes the CUDA array or linear
     *     memory bound to the texture reference hTexRef available to a
     *     device program as a texture. In this version of CUDA, the
     *     texture-reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches. 
     *     
     *   
     *   
     * 
     * 
     * @param hfunc Kernel to add texture-reference to
     * @param texunit Texture unit (must be CU_PARAM_TR_DEFAULT)
     * @param hTexRef Texture-reference to add to argument list
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     */    
    public static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef)
    {
        return checkResult(cuParamSetTexRefNative(hfunc, texunit, hTexRef));
    }

    private static native int cuParamSetTexRefNative(CUfunction hfunc, int texunit, CUtexref hTexRef);

    /**
     *      * \brief Returns occupancy of a function
     *
     * Returns in \p *numBlocks the number of the maximum active blocks per
     * streaming multiprocessor.
     *
     * \param numBlocks       - Returned occupancy
     * \param func            - Kernel for which occupancy is calulated
     * \param blockSize       - Block size the kernel is intended to be launched with
     * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
     */
    public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize)
    {
        return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorNative(numBlocks, func, blockSize, dynamicSMemSize));
    }
    private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize);

    
    public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags)
    {
        return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(numBlocks, func, blockSize, dynamicSMemSize, flags));
    }
    private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags);
    
    
    
    /**
     *      * \brief Suggest a launch configuration with reasonable occupancy
     *
     * Returns in \p *blockSize a reasonable block size that can achieve
     * the maximum occupancy (or, the maximum number of active warps with
     * the fewest blocks per multiprocessor), and in \p *minGridSize the
     * minimum grid size to achieve the maximum occupancy.
     *
     * If \p blockSizeLimit is 0, the configurator will use the maximum
     * block size permitted by the device / function instead.
     *
     * If per-block dynamic shared memory allocation is not needed, the
     * user should leave both \p blockSizeToDynamicSMemSize and \p
     * dynamicSMemSize as 0.
     *
     * If per-block dynamic shared memory allocation is needed, then if
     * the dynamic shared memory size is constant regardless of block
     * size, the size should be passed through \p dynamicSMemSize, and \p
     * blockSizeToDynamicSMemSize should be NULL.
     *
     * Otherwise, if the per-block dynamic shared memory size varies with
     * different block sizes, the user needs to provide a unary function
     * through \p blockSizeToDynamicSMemSize that computes the dynamic
     * shared memory needed by \p func for any given block size. \p
     * dynamicSMemSize is ignored. An example signature is:
     *
     * \code
     *    // Take block size, returns dynamic shared memory needed
     *    size_t blockToSmem(int blockSize);
     * \endcode
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calulated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
     */
    public static int cuOccupancyMaxPotentialBlockSize(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit)
    {
        // The callback involves a state on the native side, 
        // so ensure synchronization here
        synchronized (OCCUPANCY_LOCK)
        {
            return checkResult(cuOccupancyMaxPotentialBlockSizeNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit));
        }
    }
    private static native int cuOccupancyMaxPotentialBlockSizeNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit);
    
    
    public static int cuOccupancyMaxPotentialBlockSizeWithFlags(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags)
    {
        // The callback involves a state on the native side, 
        // so ensure synchronization here
        synchronized (OCCUPANCY_LOCK)
        {
            return checkResult(cuOccupancyMaxPotentialBlockSizeWithFlagsNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags));
        }
    }
    private static native int cuOccupancyMaxPotentialBlockSizeWithFlagsNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags);

    private static final Object OCCUPANCY_LOCK = new Object();
    
    /**
     * Launches a CUDA function.
     * 
     *      * CUresult cuLaunch (
     *      CUfunction f )
     * 
     * 
     *   Launches a CUDA function.  
     *     DeprecatedInvokes the kernel f
     *     on a 1 x 1 x 1 grid of blocks. The block contains the number of threads
     *     specified by a previous call to cuFuncSetBlockShape().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param f Kernel to launch
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuLaunch(CUfunction f)
    {
        return checkResult(cuLaunchNative(f));
    }

    private static native int cuLaunchNative(CUfunction f);


    /**
     * Launches a CUDA function.
     * 
     *      * CUresult cuLaunchGrid (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height )
     * 
     * 
     *   Launches a CUDA function.  
     *     DeprecatedInvokes the kernel f
     *     on a grid_width x grid_height grid of blocks. Each
     *     block contains the number of threads specified by a previous call to
     *     cuFuncSetBlockShape().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param f Kernel to launch
     * @param grid_width Width of grid in blocks
     * @param grid_height Height of grid in blocks
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
    {
        return checkResult(cuLaunchGridNative(f, grid_width, grid_height));
    }

    private static native int cuLaunchGridNative(CUfunction f, int grid_width, int grid_height);


    /**
     * Launches a CUDA function.
     * 
     *      * CUresult cuLaunchGridAsync (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height,
     *      CUstream hStream )
     * 
     * 
     *   Launches a CUDA function.  
     *     DeprecatedInvokes the kernel f
     *     on a grid_width x grid_height grid of blocks. Each
     *     block contains the number of threads specified by a previous call to
     *     cuFuncSetBlockShape().
     *   
     *   cuLaunchGridAsync() can optionally be
     *     associated to a stream by passing a non-zero hStream
     *     argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param f Kernel to launch
     * @param grid_width Width of grid in blocks
     * @param grid_height Height of grid in blocks
     * @param hStream Stream identifier
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_FAILED,
     * CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT,
     * CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchKernel
     */    
    public static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream)
    {
        return checkResult(cuLaunchGridAsyncNative(f, grid_width, grid_height, hStream));
    }

    private static native int cuLaunchGridAsyncNative(CUfunction f, int grid_width, int grid_height, CUstream hStream);


    /**
     * Creates an event.
     * 
     *      * CUresult cuEventCreate (
     *      CUevent* phEvent,
     *      unsigned int  Flags )
     * 
     * 
     *   Creates an event.  Creates an event
     *     *phEvent with the flags specified via Flags. Valid flags
     *     include:
     *   

     *     
     *       CU_EVENT_DEFAULT: Default event
     *         creation flag.
     *       
     *     
     *     
     *       CU_EVENT_BLOCKING_SYNC:
     *         Specifies that the created event should use blocking synchronization.
     *         A CPU thread that uses cuEventSynchronize() to wait on an event created
     *         with this flag will block until the event has actually been recorded.
     *       
     *     
     *     
     *       CU_EVENT_DISABLE_TIMING:
     *         Specifies that the created event does not need to record timing data.
     *         Events created with this flag specified and the CU_EVENT_BLOCKING_SYNC
     *         flag not specified will provide the best performance when used with
     *         cuStreamWaitEvent() and cuEventQuery().
     *       
     *     
     *     
     *       CU_EVENT_INTERPROCESS: Specifies
     *         that the created event may be used as an interprocess event by
     *         cuIpcGetEventHandle(). CU_EVENT_INTERPROCESS must be specified along
     *         with CU_EVENT_DISABLE_TIMING.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param phEvent Returns newly created event
     * @param Flags Event creation flags
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */    
    public static int cuEventCreate(CUevent phEvent, int Flags)
    {
        return checkResult(cuEventCreateNative(phEvent, Flags));
    }

    private static native int cuEventCreateNative(CUevent phEvent, int Flags);


    /**
     * Records an event.
     * 
     *      * CUresult cuEventRecord (
     *      CUevent hEvent,
     *      CUstream hStream )
     * 
     * 
     *   Records an event.  Records an event. If
     *     hStream is non-zero, the event is recorded after all preceding
     *     operations in hStream have been completed; otherwise, it is
     *     recorded after all preceding operations in the CUDA context have been
     *     completed. Since
     *     operation is asynchronous, cuEventQuery
     *     and/or cuEventSynchronize() must be used to determine when the event
     *     has actually been recorded.
     *   
     *   If cuEventRecord() has previously been
     *     called on hEvent, then this call will overwrite any existing
     *     state in hEvent. Any subsequent calls which examine the
     *     status of hEvent will only examine the completion of this
     *     most recent call to cuEventRecord().
     *   
     *   It is necessary that hEvent
     *     and hStream be created on the same context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hEvent Event to record
     * @param hStream Stream to record event for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */    
    public static int cuEventRecord(CUevent hEvent, CUstream hStream)
    {
        return checkResult(cuEventRecordNative(hEvent, hStream));
    }

    private static native int cuEventRecordNative(CUevent hEvent, CUstream hStream);


    /**
     * Queries an event's status.
     * 
     *      * CUresult cuEventQuery (
     *      CUevent hEvent )
     * 
     * 
     *   Queries an event's status.  Query the
     *     status of all device work preceding the most recent call to
     *     cuEventRecord() (in the appropriate compute streams, as specified by
     *     the arguments to cuEventRecord()).
     *   
     *   If this work has successfully been
     *     completed by the device, or if cuEventRecord() has not been called on
     *     hEvent, then CUDA_SUCCESS is returned. If this work has not
     *     yet been completed by the device then CUDA_ERROR_NOT_READY is
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hEvent Event to query
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_READY
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */    
    public static int cuEventQuery(CUevent hEvent)
    {
        return checkResult(cuEventQueryNative(hEvent));
    }

    private static native int cuEventQueryNative(CUevent hEvent);


    /**
     * Waits for an event to complete.
     * 
     *      * CUresult cuEventSynchronize (
     *      CUevent hEvent )
     * 
     * 
     *   Waits for an event to complete.  Wait
     *     until the completion of all device work preceding the most recent call
     *     to cuEventRecord() (in the appropriate compute streams, as specified
     *     by the arguments to cuEventRecord()).
     *   
     *   If cuEventRecord() has not been called
     *     on hEvent, CUDA_SUCCESS is returned immediately.
     *   
     *   Waiting for an event that was created
     *     with the CU_EVENT_BLOCKING_SYNC flag will cause the calling CPU thread
     *     to block until the event has been completed by the device. If the
     *     CU_EVENT_BLOCKING_SYNC flag has not been set, then the CPU thread will
     *     busy-wait until the event has been completed by the device.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hEvent Event to wait for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */    
    public static int cuEventSynchronize(CUevent hEvent)
    {
        return checkResult(cuEventSynchronizeNative(hEvent));
    }

    private static native int cuEventSynchronizeNative(CUevent hEvent);


    /**
     * Destroys an event.
     * 
     *      * CUresult cuEventDestroy (
     *      CUevent hEvent )
     * 
     * 
     *   Destroys an event.  Destroys the event
     *     specified by hEvent.
     *   
     *   In case hEvent has been
     *     recorded but has not yet been completed when cuEventDestroy() is
     *     called, the function will return immediately and the resources
     *     associated with hEvent will be released automatically once
     *     the device has completed hEvent.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hEvent Event to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventElapsedTime
     */    
    public static int cuEventDestroy(CUevent hEvent)
    {
        return checkResult(cuEventDestroyNative(hEvent));
    }

    private static native int cuEventDestroyNative(CUevent hEvent);


    /**
     * Computes the elapsed time between two events.
     * 
     *      * CUresult cuEventElapsedTime (
     *      float* pMilliseconds,
     *      CUevent hStart,
     *      CUevent hEnd )
     * 
     * 
     *   Computes the elapsed time between two
     *     events.  Computes the elapsed time between two events (in milliseconds
     *     with a resolution
     *     of around 0.5 microseconds).
     *   
     *   If either event was last recorded in a
     *     non-NULL stream, the resulting time may be greater than expected (even
     *     if both used
     *     the same stream handle). This happens
     *     because the cuEventRecord() operation takes place asynchronously and
     *     there is no guarantee that the measured latency is actually just
     *     between the two
     *     events. Any number of other different
     *     stream operations could execute in between the two measured events,
     *     thus altering the
     *     timing in a significant way.
     *   
     *   If cuEventRecord() has not been called
     *     on either event then CUDA_ERROR_INVALID_HANDLE is returned. If
     *     cuEventRecord() has been called on both events but one or both of them
     *     has not yet been completed (that is, cuEventQuery() would return
     *     CUDA_ERROR_NOT_READY on at least one of the events), CUDA_ERROR_NOT_READY
     *     is returned. If either event was created with the CU_EVENT_DISABLE_TIMING
     *     flag, then this function will return CUDA_ERROR_INVALID_HANDLE.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pMilliseconds Time between hStart and hEnd in ms
     * @param hStart Starting event
     * @param hEnd Ending event
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_READY
     * 
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     */    
    public static int cuEventElapsedTime(float pMilliseconds[], CUevent hStart, CUevent hEnd)
    {
        return checkResult(cuEventElapsedTimeNative(pMilliseconds, hStart, hEnd));
    }

    private static native int cuEventElapsedTimeNative(float pMilliseconds[], CUevent hStart, CUevent hEnd);


    /**
     * Returns information about a pointer.
     * 
     *      * CUresult cuPointerGetAttribute (
     *      void* data,
     *      CUpointer_attribute attribute,
     *      CUdeviceptr ptr )
     * 
     * 
     *   Returns information about a pointer. 
     *     The supported attributes are:
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_CONTEXT:
     *       
     *     
     *   
     *   
     *   Returns in *data the CUcontext
     *     in which ptr was allocated or registered. The type of data must be CUcontext *.
     *   
     *   If ptr was not allocated by,
     *     mapped by, or registered with a CUcontext which uses unified virtual
     *     addressing then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_MEMORY_TYPE:
     *       
     *     
     *   
     *   
     *   Returns in *data the physical
     *     memory type of the memory that ptr addresses as a CUmemorytype
     *     enumerated value. The type of data must be unsigned int.
     *   
     *   If ptr addresses device memory
     *     then *data is set to CU_MEMORYTYPE_DEVICE. The particular
     *     CUdevice on which the memory resides is the CUdevice of the CUcontext
     *     returned by the CU_POINTER_ATTRIBUTE_CONTEXT attribute of ptr.
     *   
     *   If ptr addresses host memory
     *     then *data is set to CU_MEMORYTYPE_HOST.
     *   
     *   If ptr was not allocated by,
     *     mapped by, or registered with a CUcontext which uses unified virtual
     *     addressing then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   If the current CUcontext does not
     *     support unified virtual addressing then CUDA_ERROR_INVALID_CONTEXT is
     *     returned.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_DEVICE_POINTER:
     *       
     *     
     *   
     *   
     *   Returns in *data the device
     *     pointer value through which ptr may be accessed by kernels
     *     running in the current CUcontext. The type of data must be
     *     CUdeviceptr *.
     *   
     *   If there exists no device pointer value
     *     through which kernels running in the current CUcontext may access ptr then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   If there is no current CUcontext then
     *     CUDA_ERROR_INVALID_CONTEXT is returned.
     *   
     *   Except in the exceptional disjoint
     *     addressing cases discussed below, the value returned in *data
     *     will equal the input value ptr.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_HOST_POINTER:
     *       
     *     
     *   
     *   
     *   Returns in *data the host
     *     pointer value through which ptr may be accessed by by the
     *     host program. The type of data must be void **. If there
     *     exists no host pointer value through which the host program may directly
     *     access ptr then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   Except in the exceptional disjoint
     *     addressing cases discussed below, the value returned in *data
     *     will equal the input value ptr.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_P2P_TOKENS:
     *       
     *     
     *   
     *   
     *   Returns in *data two tokens
     *     for use with the nv-p2p.h Linux kernel interface. data must
     *     be a struct of type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS.
     *   
     *   ptr must be a pointer to
     *     memory obtained from :cuMemAlloc(). Note that p2pToken and vaSpaceToken
     *     are only valid for the lifetime of the source allocation. A subsequent
     *     allocation at
     *     the same address may return completely
     *     different tokens.
     *   
     *   
     *     Note that for most allocations in the
     *     unified virtual address space the host and device pointer for accessing
     *     the allocation
     *     will be the same. The exceptions to this
     *     are
     *   

     *     
     *       user memory registered using
     *         cuMemHostRegister
     *       
     *     
     *     
     *       host memory allocated using
     *         cuMemHostAlloc with the CU_MEMHOSTALLOC_WRITECOMBINED flag For these
     *         types of allocation there will exist separate, disjoint host and device
     *         addresses for accessing the allocation.
     *         In particular
     *       
     *     
     *     
     *       The host address will correspond
     *         to an invalid unmapped device address (which will result in an exception
     *         if accessed from
     *         the device)
     *       
     *     
     *     
     *       The device address will
     *         correspond to an invalid unmapped host address (which will result in
     *         an exception if accessed from
     *         the host). For these types of
     *         allocations, querying CU_POINTER_ATTRIBUTE_HOST_POINTER and
     *         CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host
     *         and device addresses from either address.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param data Returned pointer attribute value
     * @param attribute Pointer attribute to query
     * @param ptr Pointer
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuMemHostUnregister
     */    
    public static int cuPointerGetAttribute(Pointer data, int attribute, CUdeviceptr ptr)
    {
        return checkResult(cuPointerGetAttributeNative(data, attribute, ptr));
    }

    private static native int cuPointerGetAttributeNative(Pointer data, int attribute, CUdeviceptr ptr);

    
    public static int cuPointerSetAttribute(Pointer value, int attribute, CUdeviceptr ptr)
    {
        return checkResult(cuPointerSetAttribute(value, attribute, ptr));
    }
    private static native int cuPointerSetAttributeNative(Pointer value, int attribute, CUdeviceptr ptr);

    
    public static int cuPointerGetAttributes(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr)
    {
        return checkResult(cuPointerGetAttributesNative(numAttributes, attributes, data, ptr));
    }
    private static native int cuPointerGetAttributesNative(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr);
    

    /**
     * Create a stream.
     * 
     *      * CUresult cuStreamCreate (
     *      CUstream* phStream,
     *      unsigned int  Flags )
     * 
     * 
     *   Create a stream.  Creates a stream and
     *     returns a handle in phStream. The Flags argument
     *     determines behaviors of the stream. Valid values for Flags
     *     are:
     *   

     *     
     *       CU_STREAM_DEFAULT: Default
     *         stream creation flag.
     *       
     *     
     *     
     *       CU_STREAM_NON_BLOCKING:
     *         Specifies that work running in the created stream may run concurrently
     *         with work in stream 0 (the NULL stream), and that
     *         the created stream should
     *         perform no implicit synchronization with stream 0.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param phStream Returned newly created stream
     * @param Flags Parameters for stream creation
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */    
    public static int cuStreamCreate(CUstream phStream, int Flags)
    {
        return checkResult(cuStreamCreateNative(phStream, Flags));
    }

    private static native int cuStreamCreateNative(CUstream phStream, int Flags);

    
    public static int cuStreamCreateWithPriority(CUstream phStream, int flags, int priority)
    {
        return checkResult(cuStreamCreateWithPriorityNative(phStream, flags, priority));
    }
    private static native int cuStreamCreateWithPriorityNative(CUstream phStream, int flags, int priority);
    
    
    public static int cuStreamGetPriority(CUstream hStream, int priority[])
    {
        return checkResult(cuStreamGetPriorityNative(hStream, priority));
    }
    private static native int cuStreamGetPriorityNative(CUstream hStream, int priority[]);
    
    public static int cuStreamGetFlags(CUstream hStream, int flags[])
    {
        return checkResult(cuStreamGetFlagsNative(hStream, flags));
    }
    private static native int cuStreamGetFlagsNative(CUstream hStream, int flags[]);
    

    /**
     * Make a compute stream wait on an event.
     * 
     *      * CUresult cuStreamWaitEvent (
     *      CUstream hStream,
     *      CUevent hEvent,
     *      unsigned int  Flags )
     * 
     * 
     *   Make a compute stream wait on an event. 
     *     Makes all future work submitted to hStream wait until hEvent reports completion before beginning execution. This
     *     synchronization will be performed efficiently on the device. The event
     *     hEvent may be from a different
     *     context than hStream, in which case this function will
     *     perform cross-device synchronization.
     *   
     *   The stream hStream will wait
     *     only for the completion of the most recent host call to cuEventRecord()
     *     on hEvent. Once this call has returned, any functions
     *     (including cuEventRecord() and cuEventDestroy()) may be called on hEvent again, and subsequent calls will not have any effect on
     *     hStream.
     *   
     *   If hStream is 0 (the NULL
     *     stream) any future work submitted in any stream will wait for hEvent to complete before beginning execution. This effectively
     *     creates a barrier for all future work submitted to the context.
     *   
     *   If cuEventRecord() has not been called
     *     on hEvent, this call acts as if the record has already
     *     completed, and so is a functional no-op.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hStream Stream to wait
     * @param hEvent Event to wait on (may not be NULL)
     * @param Flags Parameters for the operation (must be 0)
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * 
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     * @see JCudaDriver#cuStreamDestroy
     */    
    public static int cuStreamWaitEvent(CUstream hStream, CUevent hEvent, int Flags)
    {
        return checkResult(cuStreamWaitEventNative(hStream, hEvent, Flags));
    }
    private static native int cuStreamWaitEventNative(CUstream hStream, CUevent hEvent, int Flags);

    
    /**
     * Add a callback to a compute stream.
     * 
     *      * CUresult cuStreamAddCallback (
     *      CUstream hStream,
     *      CUstreamCallback callback,
     *      void* userData,
     *      unsigned int  flags )
     * 
     * 
     *   Add a callback to a compute stream.  Adds
     *     a callback to be called on the host after all currently enqueued items
     *     in the stream
     *     have completed. For each cuStreamAddCallback
     *     call, the callback will be executed exactly once. The callback will
     *     block later
     *     work in the stream until it is finished.
     *   
     *   The callback may be passed CUDA_SUCCESS
     *     or an error code. In the event of a device error, all subsequently
     *     executed callbacks will receive an appropriate CUresult.
     *   
     *   Callbacks must not make any CUDA API
     *     calls. Attempting to use a CUDA API will result in CUDA_ERROR_NOT_PERMITTED.
     *     Callbacks must not perform any synchronization that may depend on
     *     outstanding device work or other callbacks that are not
     *     mandated to run earlier. Callbacks
     *     without a mandated order (in independent streams) execute in undefined
     *     order and may be
     *     serialized.
     *   
     *   This API requires compute capability
     *     1.1 or greater. See cuDeviceGetAttribute or cuDeviceGetProperties to
     *     query compute capability. Attempting to use this API with earlier
     *     compute versions will return CUDA_ERROR_NOT_SUPPORTED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hStream Stream to add callback to
     * @param callback The function to call once preceding stream operations are complete
     * @param userData User specified data to be passed to the callback function
     * @param flags Reserved for future use, must be 0
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamDestroy
     */
    public static int cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, Object userData, int flags)
    {
        return checkResult(cuStreamAddCallbackNative(hStream, callback, userData, flags));
    }
    private static native int cuStreamAddCallbackNative(CUstream hStream, CUstreamCallback callback, Object userData, int flags);

    
    public static int cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, long length, int flags)
    {
        return checkResult(cuStreamAttachMemAsyncNative(hStream, dptr, length, flags));
    }
    private static native int cuStreamAttachMemAsyncNative(CUstream hStream, CUdeviceptr dptr, long length, int flags);
    
    
    /**
     * Determine status of a compute stream.
     * 
     *      * CUresult cuStreamQuery (
     *      CUstream hStream )
     * 
     * 
     *   Determine status of a compute stream. 
     *     Returns CUDA_SUCCESS if all operations in the stream specified by hStream have completed, or CUDA_ERROR_NOT_READY if not.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hStream Stream to query status of
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_READY
     * 
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */    
    public static int cuStreamQuery(CUstream hStream)
    {
        return checkResult(cuStreamQueryNative(hStream));
    }

    private static native int cuStreamQueryNative(CUstream hStream);


    /**
     * Wait until a stream's tasks are completed.
     * 
     *      * CUresult cuStreamSynchronize (
     *      CUstream hStream )
     * 
     * 
     *   Wait until a stream's tasks are completed.
     *     Waits until the device has completed all operations in the stream
     *     specified by
     *     hStream. If the context was
     *     created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will
     *     block until the stream is finished with all of its tasks.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hStream Stream to wait for
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     * 
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamAddCallback
     */    
    public static int cuStreamSynchronize(CUstream hStream)
    {
        return checkResult(cuStreamSynchronizeNative(hStream));
    }

    private static native int cuStreamSynchronizeNative(CUstream hStream);


    /**
     * Destroys a stream.
     * 
     *      * CUresult cuStreamDestroy (
     *      CUstream hStream )
     * 
     * 
     *   Destroys a stream.  Destroys the stream
     *     specified by hStream.
     *   
     *   In case the device is still doing work
     *     in the stream hStream when cuStreamDestroy() is called, the
     *     function will return immediately and the resources associated with hStream will be released automatically once the device has
     *     completed all work in hStream.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param hStream Stream to destroy
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */    
    public static int cuStreamDestroy(CUstream hStream)
    {
        return checkResult(cuStreamDestroyNative(hStream));
    }

    private static native int cuStreamDestroyNative(CUstream hStream);



    /**
     * Initializes OpenGL interoperability.
     * 
     *      * CUresult cuGLInit (
     *      void )
     * 
     * 
     *   Initializes OpenGL interoperability. 
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Initializes OpenGL interoperability.
     *     This function is deprecated and calling it is no longer required. It
     *     may fail if the
     *     needed OpenGL driver facilities are
     *     not available.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuGLMapBufferObject
     * @see JCudaDriver#cuGLRegisterBufferObject
     * @see JCudaDriver#cuGLUnmapBufferObject
     * @see JCudaDriver#cuGLUnregisterBufferObject
     * @see JCudaDriver#cuGLMapBufferObjectAsync
     * @see JCudaDriver#cuGLUnmapBufferObjectAsync
     * @see JCudaDriver#cuGLSetBufferObjectMapFlags
     */    
    public static int cuGLInit()
    {
        return checkResult(cuGLInitNative());
    }
    private static native int cuGLInitNative();


    /**
     * Create a CUDA context for interoperability with OpenGL.
     * 
     *      * CUresult cuGLCtxCreate (
     *      CUcontext* pCtx,
     *      unsigned int  Flags,
     *      CUdevice device )
     * 
     * 
     *   Create a CUDA context for
     *     interoperability with OpenGL.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 5.0.This function is deprecated and should
     *     no longer be used. It is no longer necessary to associate a CUDA
     *     context with an OpenGL
     *     context in order to achieve maximum
     *     interoperability performance.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCtx Returned CUDA context
     * @param Flags Options for CUDA context creation
     * @param device Device on which to create the context
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuGLInit
     * @see JCudaDriver#cuGLMapBufferObject
     * @see JCudaDriver#cuGLRegisterBufferObject
     * @see JCudaDriver#cuGLUnmapBufferObject
     * @see JCudaDriver#cuGLUnregisterBufferObject
     * @see JCudaDriver#cuGLMapBufferObjectAsync
     * @see JCudaDriver#cuGLUnmapBufferObjectAsync
     * @see JCudaDriver#cuGLSetBufferObjectMapFlags
     */    
    public static int cuGLCtxCreate( CUcontext pCtx, int Flags, CUdevice device )
    {
        return checkResult(cuGLCtxCreateNative(pCtx, Flags, device));
    }
    private static native int cuGLCtxCreateNative(CUcontext pCtx, int Flags, CUdevice device);


    /**
     * Gets the CUDA devices associated with the current OpenGL context.
     * 
     *      * CUresult cuGLGetDevices (
     *      unsigned int* pCudaDeviceCount,
     *      CUdevice* pCudaDevices,
     *      unsigned int  cudaDeviceCount,
     *      CUGLDeviceList deviceList )
     * 
     * 
     *   Gets the CUDA devices associated with
     *     the current OpenGL context.  Returns in *pCudaDeviceCount
     *     the number of CUDA-compatible devices corresponding to the current
     *     OpenGL context. Also returns in *pCudaDevices at most
     *     cudaDeviceCount of the CUDA-compatible devices corresponding to the
     *     current OpenGL context. If any of the GPUs being
     *     used by the current OpenGL context are
     *     not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE.
     *   
     *   The deviceList argument may
     *     be any of the following:
     *   

     *     
     *       CU_GL_DEVICE_LIST_ALL: Query
     *         all devices used by the current OpenGL context.
     *       
     *     
     *     
     *       CU_GL_DEVICE_LIST_CURRENT_FRAME:
     *         Query the devices used by the current OpenGL context to render the
     *         current frame (in SLI).
     *       
     *     
     *     
     *       CU_GL_DEVICE_LIST_NEXT_FRAME:
     *         Query the devices used by the current OpenGL context to render the next
     *         frame (in SLI). Note that this is a prediction,
     *         it can't be guaranteed that this
     *         is correct in all cases.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCudaDeviceCount Returned number of CUDA devices.
     * @param pCudaDevices Returned CUDA devices.
     * @param cudaDeviceCount The size of the output device array pCudaDevices.
     * @param deviceList The set of devices to return.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_NO_DEVICE,
     * CUDA_ERROR_INVALID_VALUECUDA_ERROR_INVALID_CONTEXT
     * 
     */    
    public static int cuGLGetDevices(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList)
    {
        return checkResult(cuGLGetDevicesNative(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, CUGLDeviceList_deviceList));
    }
    private static native int cuGLGetDevicesNative(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList);

    /**
     * Registers an OpenGL buffer object.
     * 
     *      * CUresult cuGraphicsGLRegisterBuffer (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
     * 
     *   Registers an OpenGL buffer object. 
     *     Registers the buffer object specified by buffer for access
     *     by CUDA. A handle to the registered object is returned as pCudaResource. The register flags Flags specify the
     *     intended usage, as follows:
     *   
     *   
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from and
     *         written to by CUDA. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
     *         Specifies that CUDA will not write to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
     *         not read from this resource and will write over the entire
     *         contents of the resource, so
     *         none of the data previously stored in the resource will be preserved.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCudaResource Pointer to the returned object handle
     * @param buffer name of buffer object to be registered
     * @param Flags Register flags
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * 
     * @see JCudaDriver#cuGraphicsUnregisterResource
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */    
    public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags)
    {
        return checkResult(cuGraphicsGLRegisterBufferNative(pCudaResource, buffer, Flags));
    }
    private static native int cuGraphicsGLRegisterBufferNative(CUgraphicsResource pCudaResource, int buffer, int Flags);




    /**
     * Register an OpenGL texture or renderbuffer object.
     * 
     *      * CUresult cuGraphicsGLRegisterImage (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint image,
     *      GLenum target,
     *      unsigned int  Flags )
     * 
     * 
     *   Register an OpenGL texture or renderbuffer
     *     object.  Registers the texture or renderbuffer object specified by image for access by CUDA. A handle to the registered object is
     *     returned as pCudaResource.
     *   
     *   target must match the type of
     *     the object, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE,
     *     GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D,
     *     GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER.
     *   
     *   The register flags Flags
     *     specify the intended usage, as follows:
     *   
     *   
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from and
     *         written to by CUDA. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
     *         Specifies that CUDA will not write to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
     *         not read from this resource and will write over the entire
     *         contents of the resource, so
     *         none of the data previously stored in the resource will be preserved.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST:
     *         Specifies that CUDA will bind this resource to a surface
     *         reference.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will
     *         perform texture gather operations on this resource.
     *       
     *     
     *   
     *   
     *   The following image formats are
     *     supported. For brevity's sake, the list is abbreviated. For ex., {GL_R,
     *     GL_RG} X {8, 16} would
     *     expand to the following 4 formats {GL_R8,
     *     GL_R16, GL_RG8, GL_RG16} :
     *   

     *     
     *       GL_RED, GL_RG, GL_RGBA,
     *         GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY
     *       
     *     
     *     
     *       {GL_R, GL_RG, GL_RGBA} X {8,
     *         16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I}
     *       
     *     
     *     
     *       {GL_LUMINANCE, GL_ALPHA,
     *         GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT,
     *         16UI_EXT, 32UI_EXT, 8I_EXT,
     *         16I_EXT, 32I_EXT}
     *       
     *     
     *   
     *   
     *   The following image classes are currently
     *     disallowed:
     *   

     *     
     *       Textures with borders
     *     
     *     
     *       Multisampled renderbuffers
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pCudaResource Pointer to the returned object handle
     * @param image name of texture or renderbuffer object to be registered
     * @param target Identifies the type of object specified by image
     * @param Flags Register flags
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * 
     * @see JCudaDriver#cuGraphicsUnregisterResource
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     */    
    public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags )
    {
        return checkResult(cuGraphicsGLRegisterImageNative(pCudaResource, image, target, Flags));
    }
    private static native int cuGraphicsGLRegisterImageNative(CUgraphicsResource pCudaResource, int image, int target, int Flags);


    /**
     * Registers an OpenGL buffer object.
     * 
     *      * CUresult cuGLRegisterBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Registers an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Registers the buffer object specified
     *     by buffer for access by CUDA. This function must be called
     *     before CUDA can map the buffer object. There must be a valid OpenGL
     *     context
     *     bound to the current thread when this
     *     function is called, and the buffer name is resolved by that context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param buffer The name of the buffer object to register.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED
     * 
     * @see JCudaDriver#cuGraphicsGLRegisterBuffer
     */    
    public static int cuGLRegisterBufferObject( int bufferobj )
    {
        return checkResult(cuGLRegisterBufferObjectNative(bufferobj));
    }
    private static native int cuGLRegisterBufferObjectNative(int bufferobj);


    /**
     * Maps an OpenGL buffer object.
     * 
     *      * CUresult cuGLMapBufferObject (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer )
     * 
     * 
     *   Maps an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Maps the buffer object specified by
     *     buffer into the address space of the current CUDA context
     *     and returns in *dptr and *size the base pointer
     *     and size of the resulting mapping.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   All streams in the current CUDA
     *     context are synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Returned mapped base pointer
     * @param size Returned size of mapping
     * @param buffer The name of the buffer object to map
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_MAP_FAILED
     * 
     * @see JCudaDriver#cuGraphicsMapResources
     */    
    public static int cuGLMapBufferObject( CUdeviceptr dptr, long size[],  int bufferobj )
    {
        return checkResult(cuGLMapBufferObjectNative(dptr, size, bufferobj));
    }
    private static native int cuGLMapBufferObjectNative(CUdeviceptr dptr, long size[],  int bufferobj);


    /**
     * Unmaps an OpenGL buffer object.
     * 
     *      * CUresult cuGLUnmapBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Unmaps an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unmaps the buffer object specified by
     *     buffer for access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   All streams in the current CUDA
     *     context are synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param buffer Buffer object to unmap
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuGraphicsUnmapResources
     */    
    public static int cuGLUnmapBufferObject( int bufferobj )
    {
        return checkResult(cuGLUnmapBufferObjectNative(bufferobj));
    }
    private static native int cuGLUnmapBufferObjectNative(int bufferobj);


    /**
     * Unregister an OpenGL buffer object.
     * 
     *      * CUresult cuGLUnregisterBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Unregister an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unregisters the buffer object specified
     *     by buffer. This releases any resources associated with the
     *     registered buffer. After this call, the buffer may no longer be mapped
     *     for
     *     access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param buffer Name of the buffer object to unregister
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuGraphicsUnregisterResource
     */    
    public static int cuGLUnregisterBufferObject( int bufferobj )
    {
        return checkResult(cuGLUnregisterBufferObjectNative(bufferobj));
    }
    private static native int cuGLUnregisterBufferObjectNative(int bufferobj);



    /**
     * Set the map flags for an OpenGL buffer object.
     * 
     *      * CUresult cuGLSetBufferObjectMapFlags (
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
     * 
     *   Set the map flags for an OpenGL buffer
     *     object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Sets the map flags for the buffer
     *     object specified by buffer.
     *   
     *   Changes to Flags will take
     *     effect the next time buffer is mapped. The Flags
     *     argument may be any of the following:
     *   

     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from
     *         and written to by CUDA kernels. This is the default value.
     *       
     *     
     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY:
     *         Specifies that CUDA kernels which access this resource will not write
     *         to this resource.
     *       
     *     
     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
     *         which access this resource will not read from this resource
     *         and will write over the
     *         entire contents of the resource, so none of the data previously stored
     *         in the resource will be preserved.
     *       
     *     
     *   
     *   
     *   If buffer has not been
     *     registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is
     *     returned. If buffer is presently mapped for access by CUDA,
     *     then CUDA_ERROR_ALREADY_MAPPED is returned.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param buffer Buffer object to unmap
     * @param Flags Map flags
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
     * 
     * @see JCudaDriver#cuGraphicsResourceSetMapFlags
     */    
    public static int cuGLSetBufferObjectMapFlags( int buffer, int Flags )
    {
        return checkResult((cuGLSetBufferObjectMapFlagsNative(buffer, Flags)));
    }
    private static native int cuGLSetBufferObjectMapFlagsNative( int buffer, int Flags );


    /**
     * Maps an OpenGL buffer object.
     * 
     *      * CUresult cuGLMapBufferObjectAsync (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer,
     *      CUstream hStream )
     * 
     * 
     *   Maps an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Maps the buffer object specified by
     *     buffer into the address space of the current CUDA context
     *     and returns in *dptr and *size the base pointer
     *     and size of the resulting mapping.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   Stream hStream in the
     *     current CUDA context is synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param dptr Returned mapped base pointer
     * @param size Returned size of mapping
     * @param buffer The name of the buffer object to map
     * @param hStream Stream to synchronize
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_MAP_FAILED
     * 
     * @see JCudaDriver#cuGraphicsMapResources
     */    
    public static int cuGLMapBufferObjectAsync( CUdeviceptr dptr, long size[],  int buffer, CUstream hStream)
    {
        return checkResult((cuGLMapBufferObjectAsyncNative(dptr, size, buffer, hStream)));
    }
    private static native int cuGLMapBufferObjectAsyncNative( CUdeviceptr dptr, long size[],  int buffer, CUstream hStream);


    /**
     * Unmaps an OpenGL buffer object.
     * 
     *      * CUresult cuGLUnmapBufferObjectAsync (
     *      GLuint buffer,
     *      CUstream hStream )
     * 
     * 
     *   Unmaps an OpenGL buffer object.  
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unmaps the buffer object specified by
     *     buffer for access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   Stream hStream in the
     *     current CUDA context is synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param buffer Name of the buffer object to unmap
     * @param hStream Stream to synchronize
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuGraphicsUnmapResources
     */    
    public static int cuGLUnmapBufferObjectAsync( int buffer, CUstream hStream )
    {
        return checkResult((cuGLUnmapBufferObjectAsyncNative(buffer, hStream)));
    }
    private static native int cuGLUnmapBufferObjectAsyncNative( int buffer, CUstream hStream );




    /**
     * Unregisters a graphics resource for access by CUDA.
     * 
     *      * CUresult cuGraphicsUnregisterResource (
     *      CUgraphicsResource resource )
     * 
     * 
     *   Unregisters a graphics resource for
     *     access by CUDA.  Unregisters the graphics resource resource
     *     so it is not accessible by CUDA unless registered again.
     *   
     *   If resource is invalid then
     *     CUDA_ERROR_INVALID_HANDLE is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param resource Resource to unregister
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuGraphicsGLRegisterBuffer
     * @see JCudaDriver#cuGraphicsGLRegisterImage
     */    
    public static int cuGraphicsUnregisterResource(CUgraphicsResource resource)
    {
        return checkResult(cuGraphicsUnregisterResourceNative(resource));
    }
    private static native int cuGraphicsUnregisterResourceNative(CUgraphicsResource resource);


    /**
     * Get an array through which to access a subresource of a mapped graphics resource.
     * 
     *      * CUresult cuGraphicsSubResourceGetMappedArray (
     *      CUarray* pArray,
     *      CUgraphicsResource resource,
     *      unsigned int  arrayIndex,
     *      unsigned int  mipLevel )
     * 
     * 
     *   Get an array through which to access a
     *     subresource of a mapped graphics resource.  Returns in *pArray
     *     an array through which the subresource of the mapped graphics resource
     *     resource which corresponds to array index arrayIndex
     *     and mipmap level mipLevel may be accessed. The value set in
     *     *pArray may change every time that resource is
     *     mapped.
     *   
     *   If resource is not a texture
     *     then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY
     *     is returned. If arrayIndex is not a valid array index for
     *     resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then
     *     CUDA_ERROR_INVALID_VALUE is returned. If resource is not
     *     mapped then CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pArray Returned array through which a subresource of resource may be accessed
     * @param resource Mapped resource to access
     * @param arrayIndex Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access
     * @param mipLevel Mipmap level for the subresource to access
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
     * 
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */    
    public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel)
    {
        return checkResult(cuGraphicsSubResourceGetMappedArrayNative(pArray, resource, arrayIndex, mipLevel));
    }
    private static native int cuGraphicsSubResourceGetMappedArrayNative(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel);


    /**
     * Get a mipmapped array through which to access a mapped graphics resource.
     * 
     *      * CUresult cuGraphicsResourceGetMappedMipmappedArray (
     *      CUmipmappedArray* pMipmappedArray,
     *      CUgraphicsResource resource )
     * 
     * 
     *   Get a mipmapped array through which to
     *     access a mapped graphics resource.  Returns in *pMipmappedArray
     *     a mipmapped array through which the mapped graphics resource resource. The value set in *pMipmappedArray may change
     *     every time that resource is mapped.
     *   
     *   If resource is not a texture
     *     then it cannot be accessed via a mipmapped array and
     *     CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If resource is
     *     not mapped then CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pMipmappedArray Returned mipmapped array through which resource may be accessed
     * @param resource Mapped resource to access
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
     * 
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */
    public static int cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource)
    {
        return checkResult(cuGraphicsResourceGetMappedMipmappedArrayNative(pMipmappedArray, resource));
    }
    private static native int cuGraphicsResourceGetMappedMipmappedArrayNative(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource);

    
    /**
     * Get a device pointer through which to access a mapped graphics resource.
     * 
     *      * CUresult cuGraphicsResourceGetMappedPointer (
     *      CUdeviceptr* pDevPtr,
     *      size_t* pSize,
     *      CUgraphicsResource resource )
     * 
     * 
     *   Get a device pointer through which to
     *     access a mapped graphics resource.  Returns in *pDevPtr a
     *     pointer through which the mapped graphics resource resource
     *     may be accessed. Returns in pSize the size of the memory in
     *     bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is
     *     mapped.
     *   
     *   If resource is not a buffer
     *     then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER
     *     is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED
     *     is returned. * 
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pDevPtr Returned pointer through which resource may be accessed
     * @param pSize Returned size of the buffer accessible starting at *pPointer
     * @param resource Mapped resource to access
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_POINTER
     * 
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     */    
    public static int cuGraphicsResourceGetMappedPointer( CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource )
    {
        return checkResult(cuGraphicsResourceGetMappedPointerNative(pDevPtr, pSize, resource));
    }
    private static native int cuGraphicsResourceGetMappedPointerNative(CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource);


    /**
     * Set usage flags for mapping a graphics resource.
     * 
     *      * CUresult cuGraphicsResourceSetMapFlags (
     *      CUgraphicsResource resource,
     *      unsigned int  flags )
     * 
     * 
     *   Set usage flags for mapping a graphics
     *     resource.  Set flags for mapping the graphics resource resource.
     *   
     *   Changes to flags will take
     *     effect the next time resource is mapped. The flags
     *     argument may be any of the following:
     *   
     *   
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that
     *         this resource will be read from
     *         and written to by CUDA kernels. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY:
     *         Specifies that CUDA kernels which access this resource will not write
     *         to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA
     *         kernels which access this resource will not read from this
     *         resource and will write over
     *         the entire contents of the resource, so none of the data previously
     *         stored in the resource will
     *         be preserved.
     *       
     *     
     *   
     *   
     *   If resource is presently
     *     mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned.
     *     If flags is not one of the above values then
     *     CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param resource Registered resource to set flags for
     * @param flags Parameters for resource mapping
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED
     * 
     * @see JCudaDriver#cuGraphicsMapResources
     */    
    public static int cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, int flags )
    {
        return checkResult(cuGraphicsResourceSetMapFlagsNative(resource, flags));
    }
    private static native int cuGraphicsResourceSetMapFlagsNative( CUgraphicsResource resource, int flags );


    /**
     * Map graphics resources for access by CUDA.
     * 
     *      * CUresult cuGraphicsMapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
     * 
     *   Map graphics resources for access by
     *     CUDA.  Maps the count graphics resources in resources
     *     for access by CUDA.
     *   
     *   The resources in resources
     *     may be accessed by CUDA until they are unmapped. The graphics API from
     *     which resources were registered should not access any
     *     resources while they are mapped by CUDA. If an application does so,
     *     the results are
     *     undefined.
     *   
     *   This function provides the synchronization
     *     guarantee that any graphics calls issued before cuGraphicsMapResources()
     *     will complete before any subsequent CUDA work issued in stream
     *     begins.
     *   
     *   If resources includes any
     *     duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
     *     of resources are presently mapped for access by CUDA then
     *     CUDA_ERROR_ALREADY_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param count Number of resources to map
     * @param resources Resources to map for CUDA usage
     * @param hStream Stream with which to synchronize
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     * @see JCudaDriver#cuGraphicsUnmapResources
     */    
    public static int cuGraphicsMapResources(int count, CUgraphicsResource resources[], CUstream hStream)
    {
        return checkResult(cuGraphicsMapResourcesNative(count, resources, hStream));
    }
    private static native int cuGraphicsMapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);


    /**
     * Unmap graphics resources.
     * 
     *      * CUresult cuGraphicsUnmapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
     * 
     *   Unmap graphics resources.  Unmaps the
     *     count graphics resources in resources.
     *   
     *   Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped
     *     again.
     *   
     *   This function provides the synchronization
     *     guarantee that any CUDA work issued in stream before
     *     cuGraphicsUnmapResources() will complete before any subsequently issued
     *     graphics work begins.
     *   
     *   If resources includes any
     *     duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
     *     of resources are not presently mapped for access by CUDA then
     *     CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param count Number of resources to unmap
     * @param resources Resources to unmap
     * @param hStream Stream with which to synchronize
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuGraphicsMapResources
     */    
    public static int cuGraphicsUnmapResources( int count, CUgraphicsResource resources[], CUstream hStream)
    {
        return checkResult(cuGraphicsUnmapResourcesNative(count, resources, hStream));
    }
    private static native int cuGraphicsUnmapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);



    /**
     * Set resource limits.
     * 
     *      * CUresult cuCtxSetLimit (
     *      CUlimit limit,
     *      size_t value )
     * 
     * 
     *   Set resource limits.  Setting limit to value is a request by the application to
     *     update the current limit maintained by the context. The driver is free
     *     to modify the requested
     *     value to meet h/w requirements (this
     *     could be clamping to minimum or maximum values, rounding up to nearest
     *     element size,
     *     etc). The application can use
     *     cuCtxGetLimit() to find out exactly what the limit has been set to.
     *   
     *   Setting each CUlimit has its own specific
     *     restrictions, so each is discussed here.
     *   
     *   
     *     
     *       CU_LIMIT_STACK_SIZE controls
     *         the stack size in bytes of each GPU thread. This limit is only
     *         applicable to devices of compute capability 2.0 and
     *         higher. Attempting to set this
     *         limit on devices of compute capability less than 2.0 will result in
     *         the error CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_PRINTF_FIFO_SIZE
     *         controls the size in bytes of the FIFO used by the printf() device
     *         system call. Setting CU_LIMIT_PRINTF_FIFO_SIZE must be performed before
     *         launching any kernel that uses the printf() device system call,
     *         otherwise CUDA_ERROR_INVALID_VALUE will be returned. This limit is only
     *         applicable to devices of compute capability 2.0 and higher. Attempting
     *         to set this limit
     *         on devices of compute capability
     *         less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
     *         being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_MALLOC_HEAP_SIZE
     *         controls the size in bytes of the heap used by the malloc() and free()
     *         device system calls. Setting CU_LIMIT_MALLOC_HEAP_SIZE must be performed
     *         before launching any kernel that uses the malloc() or free() device
     *         system calls, otherwise CUDA_ERROR_INVALID_VALUE will be returned. This
     *         limit is only applicable to devices of compute capability 2.0 and
     *         higher. Attempting to set this limit
     *         on devices of compute capability
     *         less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
     *         being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH
     *         controls the maximum nesting depth of a grid at which a thread can
     *         safely call cudaDeviceSynchronize(). Setting this limit
     *         must be performed before any
     *         launch of a kernel that uses the device runtime and calls
     *         cudaDeviceSynchronize() above the default
     *         sync depth, two levels of grids.
     *         Calls to cudaDeviceSynchronize() will fail with error code
     *         cudaErrorSyncDepthExceeded if
     *         the limitation is violated. This
     *         limit can be set smaller than the default or up the maximum launch
     *         depth of 24. When setting
     *         this limit, keep in mind that
     *         additional levels of sync depth require the driver to reserve large
     *         amounts of device memory
     *         which can no longer be used for
     *         user allocations. If these reservations of device memory fail,
     *         cuCtxSetLimit will return CUDA_ERROR_OUT_OF_MEMORY, and the limit can
     *         be reset to a lower value. This limit is only applicable to devices of
     *         compute capability 3.5 and higher.
     *         Attempting to set this limit on
     *         devices of compute capability less than 3.5 will result in the error
     *         CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number
     *         of outstanding device runtime launches that can be made from the
     *         current context. A grid is outstanding
     *         from the point of launch up
     *         until the grid is known to have been completed. Device runtime launches
     *         which violate this limitation
     *         fail and return
     *         cudaErrorLaunchPendingCountExceeded when cudaGetLastError() is called
     *         after launch. If more pending launches
     *         than the default (2048 launches)
     *         are needed for a module using the device runtime, this limit can be
     *         increased. Keep in mind
     *         that being able to sustain
     *         additional pending launches will require the driver to reserve larger
     *         amounts of device memory
     *         upfront which can no longer be
     *         used for allocations. If these reservations fail, cuCtxSetLimit will
     *         return CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower
     *         value. This limit is only applicable to devices of compute capability
     *         3.5 and higher.
     *         Attempting to set this limit on
     *         devices of compute capability less than 3.5 will result in the error
     *         CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param limit Limit to set
     * @param value Size of limit
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_UNSUPPORTED_LIMIT,
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxSetLimit(int limit, long value)
    {
        return checkResult(cuCtxSetLimitNative(limit, value));
    }
    private static native int cuCtxSetLimitNative(int limit, long value);



    /**
     * Returns the preferred cache configuration for the current context.
     * 
     *      * CUresult cuCtxGetCacheConfig (
     *      CUfunc_cache* pconfig )
     * 
     * 
     *   Returns the preferred cache configuration
     *     for the current context.  On devices where the L1 cache and shared
     *     memory use the
     *     same hardware resources, this function
     *     returns through pconfig the preferred cache configuration
     *     for the current context. This is only a preference. The driver will
     *     use the requested configuration
     *     if possible, but it is free to choose a
     *     different configuration if required to execute functions.
     *   
     *   This will return a pconfig of
     *     CU_FUNC_CACHE_PREFER_NONE on devices where the size of the L1 cache
     *     and shared memory are fixed.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pconfig Returned cache configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuFuncSetCacheConfig
     */    
    public static int cuCtxGetCacheConfig(int pconfig[])
    {
        return checkResult(cuCtxGetCacheConfigNative(pconfig));
    }
    private static native int cuCtxGetCacheConfigNative(int[] pconfig);

    /**
     * Sets the preferred cache configuration for the current context.
     * 
     *      * CUresult cuCtxSetCacheConfig (
     *      CUfunc_cache config )
     * 
     * 
     *   Sets the preferred cache configuration
     *     for the current context.  On devices where the L1 cache and shared
     *     memory use the same
     *     hardware resources, this sets through
     *     config the preferred cache configuration for the current
     *     context. This is only a preference. The driver will use the requested
     *     configuration
     *     if possible, but it is free to choose a
     *     different configuration if required to execute the function. Any
     *     function preference
     *     set via cuFuncSetCacheConfig() will be
     *     preferred over this context-wide setting. Setting the context-wide
     *     cache configuration to CU_FUNC_CACHE_PREFER_NONE will cause subsequent
     *     kernel launches to prefer to not change the cache configuration unless
     *     required to launch the kernel.
     *   
     *   This setting does nothing on devices
     *     where the size of the L1 cache and shared memory are fixed.
     *   
     *   Launching a kernel with a different
     *     preference than the most recent preference setting may insert a
     *     device-side synchronization
     *     point.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param config Requested cache configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuFuncSetCacheConfig
     */    
    public static int cuCtxSetCacheConfig(int config)
    {
        return checkResult(cuCtxSetCacheConfigNative(config));
    }
    private static native int cuCtxSetCacheConfigNative(int config);


    /**
     * Returns the current shared memory configuration for the current context.
     * 
     *      * CUresult cuCtxGetSharedMemConfig (
     *      CUsharedconfig* pConfig )
     * 
     * 
     *   Returns the current shared memory
     *     configuration for the current context.  This function will return in
     *     pConfig the current size of shared memory banks in the
     *     current context. On devices with configurable shared memory banks,
     *     cuCtxSetSharedMemConfig can be used to change this setting, so that
     *     all subsequent kernel launches will by default use the new bank size.
     *     When cuCtxGetSharedMemConfig is called on devices without configurable
     *     shared memory, it will return the fixed bank size of the hardware.
     *   
     *   The returned bank configurations can be
     *     either:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is
     *         four bytes.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width
     *         will eight bytes.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pConfig returned shared memory configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     */    
    public static int cuCtxGetSharedMemConfig(int pConfig[])
    {
        return checkResult(cuCtxGetSharedMemConfig(pConfig));
    }
    private static native int cuCtxGetSharedMemConfigNative(int pConfig[]);
    
    
    /**
     * Sets the shared memory configuration for the current context.
     * 
     *      * CUresult cuCtxSetSharedMemConfig (
     *      CUsharedconfig config )
     * 
     * 
     *   Sets the shared memory configuration for
     *     the current context.  On devices with configurable shared memory banks,
     *     this function
     *     will set the context's shared memory bank
     *     size which is used for subsequent kernel launches.
     *   
     *   Changed the shared memory configuration
     *     between launches may insert a device side synchronization point between
     *     those launches.
     *   
     *   Changing the shared memory bank size
     *     will not increase shared memory usage or affect occupancy of kernels,
     *     but may have major
     *     effects on performance. Larger bank sizes
     *     will allow for greater potential bandwidth to shared memory, but will
     *     change what
     *     kinds of accesses to shared memory will
     *     result in bank conflicts.
     *   
     *   This function will do nothing on devices
     *     with fixed shared memory bank size.
     *   
     *   The supported bank configurations are:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
     *         set bank width to the default initial setting (currently, four bytes).
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
     *         to be natively four bytes.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
     *         width to be natively eight bytes.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param config requested shared memory configuration
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     */    
    public static int cuCtxSetSharedMemConfig(int config)
    {
        return checkResult(cuCtxSetSharedMemConfigNative(config));
    }
    private static native int cuCtxSetSharedMemConfigNative(int config);
    
    
    /**
     * Gets the context's API version.
     * 
     *      * CUresult cuCtxGetApiVersion (
     *      CUcontext ctx,
     *      unsigned int* version )
     * 
     * 
     *   Gets the context's API version.  Returns
     *     a version number in version corresponding to the capabilities
     *     of the context (e.g. 3010 or 3020), which library developers can use
     *     to direct callers
     *     to a specific API version. If ctx is NULL, returns the API version used to create the currently
     *     bound context.
     *   
     *   Note that new API versions are only
     *     introduced when context capabilities are changed that break binary
     *     compatibility, so the
     *     API version and driver version may be
     *     different. For example, it is valid for the API version to be 3020
     *     while the driver
     *     version is 4020.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param ctx Context to check
     * @param version Pointer to version
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxGetApiVersion(CUcontext ctx, int version[])
    {
        return checkResult(cuCtxGetApiVersionNative(ctx, version));
    }
    private static native int cuCtxGetApiVersionNative(CUcontext ctx, int version[]);
    
    
    /**
     * Returns numerical values that correspond to the least and
     * greatest stream priorities. 

     *

     * Returns in *leastPriority and *greatestPriority the numerical values that correspond
     * to the least and greatest stream priorities respectively. Stream priorities
     * follow a convention where lower numbers imply greater priorities. The range of
     * meaningful stream priorities is given by [*greatestPriority, *leastPriority].
     * If the user attempts to create a stream with a priority value that is
     * outside the meaningful range as specified by this API, the priority is
     * automatically clamped down or up to either *leastPriority or *greatestPriority
     * respectively. See ::cuStreamCreateWithPriority for details on creating a
     * priority stream.

     * A NULL may be passed in for *leastPriority or *greatestPriority if the value
     * is not desired.

     * 

     * This function will return '0' in both \p *leastPriority and \p *greatestPriority if
     * the current context's device does not support stream priorities
     * (see ::cuDeviceGetAttribute).
     *
     * @param leastPriority    Pointer to an int in which the numerical value for least
     *                         stream priority is returned
     * @param greatestPriority Pointer to an int in which the numerical value for greatest
     *                         stream priority is returned
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuStreamCreateWithPriority
     * @see JCudaDriver#cuStreamGetPriority,
     * @see JCudaDriver#cuCtxGetDevice,
     * @see JCudaDriver#cuCtxSetLimit,
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxGetStreamPriorityRange(int leastPriority[], int greatestPriority[])
    {
        return checkResult(cuCtxGetStreamPriorityRangeNative(leastPriority, greatestPriority));
    }
    private static native int cuCtxGetStreamPriorityRangeNative(int leastPriority[], int greatestPriority[]);
    
    
    /**
     * Launches a CUDA function.
     * 
     * 
     *   
     *     
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *         
     *       
     *     CUresult cuLaunchKernel           ( CUfunction   f, 
unsigned int   gridDimX, 
unsigned int   gridDimY, 
unsigned int   gridDimZ, 
unsigned int   blockDimX, 
unsigned int   blockDimY, 
unsigned int   blockDimZ, 
unsigned int   sharedMemBytes, 
CUstream   hStream, 
void **   kernelParams, 
void **   extra  
) 
     *   
     *   
     *     
     *       Invokes the kernel f on a gridDimX x
     *       gridDimY x gridDimZ grid of blocks. Each
     *       block contains blockDimX x blockDimY x
     *       blockDimZ threads.
     *     

     *       sharedMemBytes sets the amount of dynamic shared memory
     *       that will be available to each thread block.
     *     

     *       cuLaunchKernel() can optionally be associated to a stream by passing a
     *       non-zero hStream argument.
     *     

     *       Kernel parameters to f can be specified in one of two
     *       ways:
     *     

     *       1) Kernel parameters can be specified via kernelParams.
     *       If f has N parameters, then kernelParams
     *       needs to be an array of N pointers. Each of kernelParams[0]
     *       through kernelParams[N-1] must point to a region of memory
     *       from which the actual kernel parameter will be copied. The number of
     *       kernel parameters and their offsets and sizes do not need to be
     *       specified as that information is retrieved directly from the kernel's
     *       image.
     *     

     *       2) Kernel parameters can also be packaged by the application into a
     *       single buffer that is passed in via the extra parameter.
     *       This places the burden on the application of knowing each kernel
     *       parameter's size and alignment/padding within the buffer. Here is an
     *       example of using the extra parameter in this manner: 
     *     

     *           size_t argBufferSize;
     *     char argBuffer[256];
     * 
     *     // populate argBuffer and argBufferSize
     * 
     *     void *config[] = {
     *         CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
     *         CU_LAUNCH_PARAM_BUFFER_SIZE,    &argBufferSize,
     *         CU_LAUNCH_PARAM_END
     *     };
     *     status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL,
     * config);
     * 
     *     
     *     
     *       The extra parameter exists to allow cuLaunchKernel to take
     *       additional less commonly used arguments. extra specifies
     *       a list of names of extra settings and their corresponding values. Each
     *       extra setting name is immediately followed by the corresponding value.
     *       The list must be terminated with either NULL or
     *       CU_LAUNCH_PARAM_END.
     *     

     *     

     *       CU_LAUNCH_PARAM_END, which indicates the end of the extra
     *         array;
     *       
     *       CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that
     *         the next value in extra will be a pointer to a buffer
     *         containing all the kernel parameters for launching kernel
     *         f;
     *       
     *       CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies
     *         that the next value in extra will be a pointer to a size_t
     *         containing the size of the buffer specified with
     *         CU_LAUNCH_PARAM_BUFFER_POINTER;
     *       
     *     
     *     
     *       The error CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters
     *       are specified with both kernelParams and extra
     *       (i.e. both kernelParams and extra are
     *       non-NULL).
     *     

     *       Calling cuLaunchKernel() sets persistent function state that is the
     *       same as function state set through the following deprecated APIs:
     *     

     *       cuFuncSetBlockShape() cuFuncSetSharedSize() cuParamSetSize()
     *       cuParamSeti() cuParamSetf() cuParamSetv()
     *     

     *       When the kernel f is launched via cuLaunchKernel(), the
     *       previous block shape, shared size and parameter info associated with
     *       f is overwritten.
     *     

     *       Note that to use cuLaunchKernel(), the kernel f must
     *       either have been compiled with toolchain version 3.2 or later so that
     *       it will contain kernel parameter information, or have no kernel
     *       parameters. If either of these conditions is not met, then
     *       cuLaunchKernel() will return CUDA_ERROR_INVALID_IMAGE.
     *     

     *   
     * 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED 
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     */
    public static int cuLaunchKernel(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams,
        Pointer extra)
    {
        return checkResult(cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra));
    }

    private static native int cuLaunchKernelNative(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams,
        Pointer extra);

    /**
     * Returns resource limits.
     * 
     *      * CUresult cuCtxGetLimit (
     *      size_t* pvalue,
     *      CUlimit limit )
     * 
     * 
     *   Returns resource limits.  Returns in *pvalue the current size of limit. The supported
     *     CUlimit values are:
     *   

     *     
     *       CU_LIMIT_STACK_SIZE: stack size
     *         in bytes of each GPU thread.
     *       
     *     
     *     
     *       CU_LIMIT_PRINTF_FIFO_SIZE: size
     *         in bytes of the FIFO used by the printf() device system call.
     *       
     *     
     *     
     *       CU_LIMIT_MALLOC_HEAP_SIZE: size
     *         in bytes of the heap used by the malloc() and free() device system
     *         calls.
     *       
     *     
     *     
     *       CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH:
     *         maximum grid depth at which a thread can issue the device runtime call
     *         cudaDeviceSynchronize() to wait on child grid launches
     *         to complete.
     *       
     *     
     *     
     *       CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of
     *         outstanding device runtime launches that can be made from this
     *         context.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param pvalue Returned size of limit
     * @param limit Limit to query
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_UNSUPPORTED_LIMIT
     * 
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuCtxGetLimit(long pvalue[], int limit)
    {
        return checkResult(cuCtxGetLimitNative(pvalue, limit));
    }
    private static native int cuCtxGetLimitNative(long pvalue[], int limit);




    /**
     * Initialize the profiling.
     * 
     *      * CUresult cuProfilerInitialize (
     *      const char* configFile,
     *      const char* outputFile,
     *      CUoutput_mode outputMode )
     * 
     * 
     *   Initialize the profiling.  Using this
     *     API user can initialize the CUDA profiler by specifying the configuration
     *     file, output
     *     file and output file format. This API is
     *     generally used to profile different set of counters by looping the
     *     kernel launch.
     *     The configFile parameter can
     *     be used to select profiling options including profiler counters. Refer
     *     to the "Compute Command Line Profiler
     *     User Guide" for supported profiler
     *     options and counters.
     *   
     *   Limitation: The CUDA profiler cannot be
     *     initialized with this API if another profiling tool is already active,
     *     as indicated
     *     by the CUDA_ERROR_PROFILER_DISABLED
     *     return code.
     *   
     *   Typical usage of the profiling APIs is
     *     as follows:
     *   
     *   for each set of counters/options
     *     {
     *     cuProfilerInitialize(); //Initialize
     *     profiling, set the counters or options in the config file 
     *     ...
     *     cuProfilerStart(); 
     *     // code to be profiled 
     *     cuProfilerStop(); 
     *     ...
     *     cuProfilerStart(); 
     *     // code to be profiled 
     *     cuProfilerStop(); 
     *     ...
     *     }
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * @param configFile Name of the config file that lists the counters/options for profiling.
     * @param outputFile Name of the outputFile where the profiling results will be stored.
     * @param outputMode outputMode, can be CU_OUT_KEY_VALUE_PAIR or CU_OUT_CSV.
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_PROFILER_DISABLED
     * 
     * @see JCudaDriver#cuProfilerStart
     * @see JCudaDriver#cuProfilerStop
     */    
    public static int cuProfilerInitialize(String configFile, String outputFile, int outputMode)
    {
        return checkResult(cuProfilerInitializeNative(configFile, outputFile, outputMode));
    }
    private static native int cuProfilerInitializeNative(String configFile, String outputFile, int outputMode);

    /**
     * Enable profiling.
     * 
     *      * CUresult cuProfilerStart (
     *      void )
     * 
     * 
     *   Enable profiling.  Enables profile
     *     collection by the active profiling tool. If profiling is already
     *     enabled, then cuProfilerStart() has no effect.
     *   
     *   cuProfilerStart and cuProfilerStop APIs
     *     are used to programmatically control the profiling granularity by
     *     allowing profiling
     *     to be done only on selective pieces of
     *     code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuProfilerInitialize
     * @see JCudaDriver#cuProfilerStop
     */    
    public static int cuProfilerStart()
    {
        return checkResult(cuProfilerStartNative());
    }
    private static native int cuProfilerStartNative();

    /**
     * Disable profiling.
     * 
     *      * CUresult cuProfilerStop (
     *      void )
     * 
     * 
     *   Disable profiling.  Disables profile
     *     collection by the active profiling tool. If profiling is already
     *     disabled, then cuProfilerStop() has no effect.
     *   
     *   cuProfilerStart and cuProfilerStop APIs
     *     are used to programmatically control the profiling granularity by
     *     allowing profiling
     *     to be done only on selective pieces of
     *     code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     * 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
     * 
     * @see JCudaDriver#cuProfilerInitialize
     * @see JCudaDriver#cuProfilerStart
     */    
    public static int cuProfilerStop()
    {
        return checkResult(cuProfilerStopNative());
    }
    private static native int cuProfilerStopNative();







}
* *CUDA array type** * *	* *Valid extents that must always be met * {(width range in * elements), (height range), (depth range)}** * *	* *Valid extents with CUDA_ARRAY3D_SURFACE_LDST set * {(width range in * elements), (height range), (depth range)}** * *
* 1D *	* { (1,TEXTURE1D_WIDTH), * 0, 0 } * *	* { (1,SURFACE1D_WIDTH), * 0, 0 } * *
* 2D *	* { (1,TEXTURE2D_WIDTH), * (1,TEXTURE2D_HEIGHT), 0 } * *	* { (1,SURFACE2D_WIDTH), * (1,SURFACE2D_HEIGHT), 0 } * *
* 3D *	* { (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } * *	* { (1,SURFACE3D_WIDTH), * (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) } * *
* 1D Layered *	* { * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } * *	* { * (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) } * *
* 2D Layered *	* { * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } * *	* { * (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) } * *
* Cubemap *	* { (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } * *	* { (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 } * *
* Cubemap Layered *	* { * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } * *	* { * (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) } * *
CUresult cuLaunchKernel	(	CUfunction	f,
		unsigned int	gridDimX,
		unsigned int	gridDimY,
		unsigned int	gridDimZ,
		unsigned int	blockDimX,
		unsigned int	blockDimY,
		unsigned int	blockDimZ,
		unsigned int	sharedMemBytes,
		CUstream	hStream,
		void **	kernelParams,
		void **	extra
	)