jcuda.driver.JCudaDriver Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcuda Show documentation
There is a newer version: 12.0.0
/*
 * JCuda - Java bindings for NVIDIA CUDA driver and runtime API
 *
 * Copyright (c) 2009-2015 Marco Hutter - http://www.jcuda.org
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

package jcuda.driver;

import java.util.Arrays;

import jcuda.CudaException;
import jcuda.JCudaVersion;
import jcuda.LibUtils;
import jcuda.LibUtilsCuda;
import jcuda.LogLevel;
import jcuda.Pointer;
import jcuda.runtime.JCuda;

/**
 * Java bindings for the NVidia CUDA driver API.

 * 

 * Most comments are extracted from the CUDA online documentation
 */
public class JCudaDriver
{
    /** The CUDA version */
    public static final int CUDA_VERSION = 11020;

    /**
     * If set, host memory is portable between CUDA contexts.
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_PORTABLE = 0x01;

    /**
     * If set, host memory is mapped into CUDA address space and
     * JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer.
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_DEVICEMAP = 0x02;

    /**
     * If set, host memory is allocated as write-combined - fast to write,
     * faster to DMA, slow to read except via SSE4 streaming load instruction
     * (MOVNTDQA).
     * Flag for {@link JCudaDriver#cuMemHostAlloc}
     */
    public static final int CU_MEMHOSTALLOC_WRITECOMBINED = 0x04;

    /**
     * If set, host memory is portable between CUDA contexts.
     * Flag for ::cuMemHostRegister()
     */
    public static final int CU_MEMHOSTREGISTER_PORTABLE   = 0x01;

    /**
     * If set, host memory is mapped into CUDA address space and
     * ::cuMemHostGetDevicePointer() may be called on the host pointer.
     * Flag for ::cuMemHostRegister()
     */
    public static final int CU_MEMHOSTREGISTER_DEVICEMAP  = 0x02;

    /**
     * If set, peer memory is mapped into CUDA address space and
     * ::cuMemPeerGetDevicePointer() may be called on the host pointer.
     * Flag for ::cuMemPeerRegister()
     * @deprecated This value has been added in CUDA 4.0 RC,
     * and removed in CUDA 4.0 RC2
     */
    @Deprecated
    public static final int CU_MEMPEERREGISTER_DEVICEMAP  = 0x02;

    /**
     * If set, the passed memory pointer is treated as pointing to some
     * memory-mapped I/O space, e.g. belonging to a third-party PCIe device.
     * On Windows the flag is a no-op.
     * On Linux that memory is marked as non cache-coherent for the GPU and
     * is expected to be physically contiguous. It may return
     * CUDA_ERROR_NOT_PERMITTED if run as an unprivileged user,
     * CUDA_ERROR_NOT_SUPPORTED on older Linux kernel versions.
     * On all other platforms, it is not supported and CUDA_ERROR_NOT_SUPPORTED
     * is returned.
     * Flag for ::cuMemHostRegister()
     */
    public static final int CU_MEMHOSTREGISTER_IOMEMORY   =  0x04;
    
    /**
    * If set, the passed memory pointer is treated as pointing to memory that is
    * considered read-only by the device.  On platforms without
    * CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is
    * required in order to register memory mapped to the CPU as read-only.  Support
    * for the use of this flag can be queried from the device attribute
    * CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED.  Using this flag with
    * a current context associated with a device that does not have this attribute
    * set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED.
    */
    public static final int CU_MEMHOSTREGISTER_READ_ONLY   = 0x08;
    
    /**
     * Indicates that the layered sparse CUDA array or CUDA mipmapped array 
     * has a single mip tail region for all layers
     */
    public static final int CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = 0x1;
    
    /**
     * This flag if set indicates that the memory will be used as a tile pool.
     */
    public static final int CU_MEM_CREATE_USAGE_TILE_POOL = 0x1;
    
    /**
     * If set, each kernel launched as part of
     * ::cuLaunchCooperativeKernelMultiDevice only waits for prior work in the
     * stream corresponding to that GPU to complete before the kernel begins
     * execution.
     */
    public static final int CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC   = 0x01;

    /**
     * If set, any subsequent work pushed in a stream that participated in a
     * call to ::cuLaunchCooperativeKernelMultiDevice will only wait for the
     * kernel launched on the GPU corresponding to that stream to complete
     * before it begins execution.
     */
    public static final int CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC  = 0x02;
    
    /**
     * If set, the CUDA array is a collection of layers, where each layer is either a 1D
     * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number
     * of layers, not the depth of a 3D array.
     */
    public static final int CUDA_ARRAY3D_LAYERED = 0x01;

    /**
     * If set, the CUDA array contains an array of 2D slices
     * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
     * the number of slices, not the depth of a 3D array.
     * @deprecated use CUDA_ARRAY3D_LAYERED
     */
    @Deprecated
    public static final int CUDA_ARRAY3D_2DARRAY = 0x01;


    /**
     * This flag must be set in order to bind a surface reference
     * to the CUDA array
     */
    public static final int CUDA_ARRAY3D_SURFACE_LDST = 0x02;

    /**
     * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The
     * width of such a CUDA array must be equal to its height, and Depth must be six.
     * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps
     * and Depth must be a multiple of six.
     */
    public static final int CUDA_ARRAY3D_CUBEMAP = 0x04;

    /**
     * This flag must be set in order to perform texture gather operations
     * on a CUDA array.
     */
    public static final int CUDA_ARRAY3D_TEXTURE_GATHER = 0x08;

    /**
     * This flag if set indicates that the CUDA
     * array is a DEPTH_TEXTURE.
    */
    public static final int CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10;

    /**
     * This flag indicates that the CUDA array may be bound as a color target
     * in an external graphics API
     */
    public static final int CUDA_ARRAY3D_COLOR_ATTACHMENT = 0x20;
    
    /**
     * This flag if set indicates that the CUDA array or CUDA mipmapped array
     * is a sparse CUDA array or CUDA mipmapped array respectively
     */
    public static final int CUDA_ARRAY3D_SPARSE = 0x40;
    
    /**
     * For texture references loaded into the module, use default
     * texunit from texture reference
     */
    public static final int CU_PARAM_TR_DEFAULT = -1;

    /**
     * Override the texref format with a format inferred from the array
     */
    public static final int CU_TRSA_OVERRIDE_FORMAT = 0x01;

    /**
     * Read the texture as integers rather than promoting the values
     * to floats in the range [0,1]
     */
    public static final int CU_TRSF_READ_AS_INTEGER = 0x01;

    /**
     * Use normalized texture coordinates in the range [0,1) instead of [0,dim)
     */
    public static final int CU_TRSF_NORMALIZED_COORDINATES = 0x02;

    /**
     * Perform sRGB->linear conversion during texture read.
     * Flag for JCudaDriver#cuTexRefSetFlags()
     */
    public static final int CU_TRSF_SRGB  = 0x10;

    /**
     * Specifies a stream callback does not block the stream while
     * executing.  This is the default behavior.
     * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
     *
     * @deprecated This flag was only present in CUDA 5.0.25 (release candidate)
     * and may be removed (or added again) in future releases
     */
    @Deprecated
    public static final int CU_STREAM_CALLBACK_NONBLOCKING  = 0x00;

    /**
     * If set, the stream callback blocks the stream until it is
     * done executing.
     * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
     *
     * @deprecated This flag was only present in CUDA 5.0.25 (release candidate)
     * and may be removed (or added again) in future releases
     */
    @Deprecated
    public static final int CU_STREAM_CALLBACK_BLOCKING     = 0x01;

    /**
     * Disable any trilinear filtering optimizations.
     * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate()
     */
    public static final int CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 0x20;
    
    /**
     * Private inner class for the constant pointer values
     * CU_LAUNCH_PARAM_END, CU_LAUNCH_PARAM_BUFFER_POINTER,
     * and CU_LAUNCH_PARAM_BUFFER_SIZE.
     *
     * TODO: These constants could be misused: There is no
     * mechanism for preventing these Pointers to be used
     * for memory allocation. However, at the moment there
     * is no other way for emulating these pointer constants.
     */
    private static class ConstantPointer extends Pointer
    {
        private ConstantPointer(long value)
        {
            super(value);
        }
    }

    /**
     * End of array terminator for the \p extra parameter to
     * ::cuLaunchKernel
     */
    public static final Pointer CU_LAUNCH_PARAM_END = new ConstantPointer(0); // ((void*)0x00)


    /**
     * Indicator that the next value in the \p extra parameter to
     * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
     * parameters used for launching kernel \p f.  This buffer needs to
     * honor all alignment/padding requirements of the individual parameters.
     * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
     * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no
     * effect.
     */
    public static final Pointer CU_LAUNCH_PARAM_BUFFER_POINTER = new ConstantPointer(1); //((void*)0x01)

    /**
     * Indicator that the next value in the \p extra parameter to
     * ::cuLaunchKernel will be a pointer to a size_t which contains the
     * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER.
     * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
     * in the \p extra array if the value associated with
     * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
     */
    public static final Pointer CU_LAUNCH_PARAM_BUFFER_SIZE = new ConstantPointer(2); //   ((void*)0x02)

    /**
     * Device that represents the CPU
     */
    public static final CUdevice CU_DEVICE_CPU = new CUdevice(-1);

    /**
     * Device that represents an invalid device
     */
    public static final CUdevice CU_DEVICE_INVALID = new CUdevice(-2);
    
    /**
     * Stream handle that can be passed as a CUstream to use an implicit stream
     * with legacy synchronization behavior.
     */
    public static final CUstream CU_STREAM_LEGACY = new CUstream(0x1);

    /**
     * Stream handle that can be passed as a CUstream to use an implicit stream
     * with per-thread synchronization behavior.
     */
    public static final CUstream CU_STREAM_PER_THREAD = new CUstream(0x2);


    /**
     * Whether a CudaException should be thrown if a method is about
     * to return a result code that is not CUresult.CUDA_SUCCESS
     */
    private static boolean exceptionsEnabled = false;


    static
    {
        String libraryBaseName = "JCudaDriver-" + JCudaVersion.get();
        String libraryName = 
            LibUtils.createPlatformLibraryName(libraryBaseName);
        LibUtilsCuda.loadLibrary(libraryName);
    }

    /* Private constructor to prevent instantiation */
    private JCudaDriver()
    {
    }

    /**
     * Set the specified log level for the JCuda driver library.

     * 

     * Currently supported log levels:
     * 

     * LOG_QUIET: Never print anything 

     * LOG_ERROR: Print error messages 

     * LOG_TRACE: Print a trace of all native function calls 

     *
     * @param logLevel The log level to use.
     */
    public static void setLogLevel(LogLevel logLevel)
    {
        setLogLevel(logLevel.ordinal());
    }

    private static native void setLogLevel(int logLevel);


    /**
     * Enables or disables exceptions. By default, the methods of this class
     * only return the CUresult error code from the underlying CUDA function.
     * If exceptions are enabled, a CudaException with a detailed error
     * message will be thrown if a method is about to return a result code
     * that is not CUresult.CUDA_SUCCESS
     *
     * @param enabled Whether exceptions are enabled
     */
    public static void setExceptionsEnabled(boolean enabled)
    {
        exceptionsEnabled = enabled;
    }

    /**
     * If the given result is different to CUresult.CUDA_SUCCESS and
     * exceptions have been enabled, this method will throw a
     * CudaException with an error message that corresponds to the
     * given result code. Otherwise, the given result is simply
     * returned.
     *
     * @param result The result to check
     * @return The result that was given as the parameter
     * @throws CudaException If exceptions have been enabled and
     * the given result code is not CUresult.CUDA_SUCCESS
     */
    private static int checkResult(int result)
    {
        if (exceptionsEnabled && result != CUresult.CUDA_SUCCESS)
        {
            throw new CudaException(CUresult.stringFor(result));
        }
        return result;
    }

    /**
     * Returns the given (address) value, adjusted to have
     * the given alignment. This function may be used to
     * align the parameters for a kernel call according
     * to their alignment requirements.
     *
     * @param value The address value
     * @param alignment The desired alignment
     * @return The aligned address value
     * @deprecated This method was intended for a simpler
     * kernel parameter setup in earlier CUDA versions,
     * and should not be required any more. It may be
     * removed in future releases.
     */
    @Deprecated
    public static int align(int value, int alignment)
    {
        return (((value) + (alignment) - 1) & ~((alignment) - 1));
    }


    /**
     * A wrapper function for
     * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)}
     * which allows passing in the options for the JIT compiler, and obtaining
     * the output of the JIT compiler via a {@link JITOptions} object. 

     * 

     * Note: This method should be considered as preliminary,
     * and might change in future releases.
     *
     */
    public static int cuModuleLoadDataJIT(CUmodule module, Pointer pointer, JITOptions jitOptions)
    {
        return cuModuleLoadDataJITNative(module, pointer, jitOptions);
    }
    private static native int cuModuleLoadDataJITNative(CUmodule module, Pointer pointer, JITOptions jitOptions);

    
    /**
     * A wrapper function for
     * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)}
     * which allows passing in the image data as a string.
     * 
     * @param module Returned module
     * @param image Module data to load
     * @param numOptions Number of options
     * @param options Options for JIT
     * @param optionValues Option values for JIT
     * @return The return code from cuModuleLoadDataEx
     * 
     * @see #cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)
     */
    public static int cuModuleLoadDataEx(CUmodule phMod, String string, int numOptions, int options[], Pointer optionValues)
    {
        byte bytes[] = string.getBytes();
        byte image[] = Arrays.copyOf(bytes, bytes.length+1);
        return cuModuleLoadDataEx(phMod, Pointer.to(image), numOptions, options, optionValues);
    }
        
    
    /**
     * A wrapper function for {@link #cuModuleLoadData(CUmodule, byte[])}
     * that converts the given string into a zero-terminated byte array.
     * 
     * @param module The module
     * @param string The data. May not be null.
     * @return The return code from cuModuleLoadData 
     * 
     * @see #cuModuleLoadData(CUmodule, byte[])
     */
    public static int cuModuleLoadData(CUmodule module, String string)
    {
        byte bytes[] = string.getBytes();
        byte image[] = Arrays.copyOf(bytes, bytes.length+1);
        return cuModuleLoadData(module, image);
    }
    
    /**
     *      * Gets the string description of an error code
     *
     * Sets *pStr to the address of a NULL-terminated string description
     * of the error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
     *
     * @param error - Error code to convert to string
     * @param pStr - Address of the string pointer.
     *
     * @return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE
     *
     * @see CUresult
     */
    public static int cuGetErrorString(int error, String pStr[])
    {
        return checkResult(cuGetErrorStringNative(error, pStr));
    }
    private static native int cuGetErrorStringNative(int error, String pStr[]);

    /**
     *      * Gets the string representation of an error code enum name
     *
     * Sets *pStr to the address of a NULL-terminated string representation
     * of the name of the enum error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
     * @param error - Error code to convert to string
     * @param pStr - Address of the string pointer.
     *
     * @return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE
     *
     * @see CUresult
     */
    public static int cuGetErrorName(int error, String pStr[])
    {
        return checkResult(cuGetErrorNameNative(error, pStr));
    }
    private static native int cuGetErrorNameNative(int error, String pStr[]);



    /**
     * Initialize the CUDA driver API.
     *
     *      * CUresult cuInit (
     *      unsigned int  Flags )
     * 
     * 
     *   Initialize the CUDA driver API.
     *     Initializes the driver API and must be called before any other function
     *     from the driver API.
     *     Currently, the Flags parameter
     *     must be 0. If cuInit() has not been called, any function from the
     *     driver API will return CUDA_ERROR_NOT_INITIALIZED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param Flags Initialization flag for CUDA.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     */
    public static int cuInit(int Flags)
    {
        return checkResult(cuInitNative(Flags));
    }

    private static native int cuInitNative(int Flags);


    /**
     * Returns a handle to a compute device.
     *
     *      * CUresult cuDeviceGet (
     *      CUdevice* device,
     *      int  ordinal )
     * 
     * 
     *   Returns a handle to a compute device.
     *     Returns in *device a device handle given an ordinal in the
     *     range [0, cuDeviceGetCount()-1].
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param device Returned device handle
     * @param ordinal Device number to get handle for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceTotalMem
     */
    public static int cuDeviceGet(CUdevice device, int ordinal)
    {
        return checkResult(cuDeviceGetNative(device, ordinal));
    }

    private static native int cuDeviceGetNative(CUdevice device, int ordinal);


    /**
     * Returns the number of compute-capable devices.
     *
     *      * CUresult cuDeviceGetCount (
     *      int* count )
     * 
     * 
     *   Returns the number of compute-capable
     *     devices.  Returns in *count the number of devices with
     *     compute capability greater than or equal to 2.0 that are available for
     *     execution. If there is
     *     no such device, cuDeviceGetCount()
     *     returns 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param count Returned number of compute-capable devices
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */
    public static int cuDeviceGetCount(int count[])
    {
        return checkResult(cuDeviceGetCountNative(count));
    }

    private static native int cuDeviceGetCountNative(int count[]);


    /**
     * Returns an identifer string for the device.
     *
     *      * CUresult cuDeviceGetName (
     *      char* name,
     *      int  len,
     *      CUdevice dev )
     * 
     * 
     *   Returns an identifer string for the
     *     device.  Returns an ASCII string identifying the device dev
     *     in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param name Returned identifier string for the device
     * @param len Maximum length of string to store in name
     * @param dev Device to get identifier string for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */
    public static int cuDeviceGetName(byte name[], int len, CUdevice dev)
    {
        return checkResult(cuDeviceGetNameNative(name, len, dev));
    }

    private static native int cuDeviceGetNameNative(byte name[], int len, CUdevice dev);

    
    /**
     * Return an UUID for the device.
     *
     * Returns 16-octets identifing the device \p dev in the structure
     * pointed by the \p uuid.
     *
     * @param uuid Returned UUID
     * @param dev  Device to get identifier string for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * 
     * @see JCudaDriver#cuDeviceGetAttribute
     * JCudaDriver#cuDeviceGetCount
     * JCudaDriver#cuDeviceGetName
     * JCudaDriver#cuDeviceGet
     * JCudaDriver#cuDeviceTotalMem
     * JCudaDriver#cudaGetDeviceProperties
     */
    public static int cuDeviceGetUuid(CUuuid uuid, CUdevice dev)
    {
        return checkResult(cuDeviceGetUuidNative(uuid, dev));
    }
    private static native int cuDeviceGetUuidNative(CUuuid uuid, CUdevice dev);

    /**
     * Return an LUID and device node mask for the device.
     *
     * Return identifying information (\p luid and \p deviceNodeMask) to allow
     * matching device with graphics APIs.
     *
     * @param luid - Returned LUID
     * @param deviceNodeMask - Returned device node mask
     * @param dev  - Device to get identifier string for
     *
     * @return CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * JCudaDriver#cuDeviceGetCount
     * JCudaDriver#cuDeviceGetName
     * JCudaDriver#cuDeviceGet
     * JCudaDriver#cuDeviceTotalMem
     * JCudaDriver#cudaGetDeviceProperties
     */
    public static int cuDeviceGetLuid(byte luid[], int deviceNodeMask[], CUdevice dev)
    {
        return checkResult(cuDeviceGetLuidNative(luid, deviceNodeMask, dev));
    }
    public static native int cuDeviceGetLuidNative(byte luid[], int deviceNodeMask[], CUdevice dev);
    
    /**
     * Returns the compute capability of the device.
     *
     *      * CUresult cuDeviceComputeCapability (
     *      int* major,
     *      int* minor,
     *      CUdevice dev )
     * 
     * 
     *   Returns the compute capability of the
     *     device.
     *     DeprecatedThis function was deprecated
     *     as of CUDA 5.0 and its functionality superceded by
     *     cuDeviceGetAttribute().
     *   
     *   Returns in *major and *minor the major and minor revision numbers that define the
     *     compute capability of the device dev.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param major Major revision number
     * @param minor Minor revision number
     * @param dev Device handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     * 
     * @deprecated Deprecated as of CUDA 5.0, replaced with {@link JCudaDriver#cuDeviceGetAttribute(int[], int, CUdevice)}
     */
    @Deprecated
    public static int cuDeviceComputeCapability(int major[], int minor[], CUdevice dev)
    {
        return checkResult(cuDeviceComputeCapabilityNative(major, minor, dev));
    }

    private static native int cuDeviceComputeCapabilityNative(int major[], int minor[], CUdevice dev);


    /**
     * Retain the primary context on the GPU.
     *
     * Retains the primary context on the device.
     * Once the user successfully retains the primary context, the primary context
     * will be active and available to the user until the user releases it
     * with ::cuDevicePrimaryCtxRelease() or resets it with ::cuDevicePrimaryCtxReset().
     * Unlike ::cuCtxCreate() the newly retained context is not pushed onto the stack.
     *
     * Retaining the primary context for the first time will fail with ::CUDA_ERROR_UNKNOWN
     * if the compute mode of the device is ::CU_COMPUTEMODE_PROHIBITED. The function
     * ::cuDeviceGetAttribute() can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to
     * determine the compute mode  of the device.
     * The nvidia-smi tool can be used to set the compute mode for
     * devices. Documentation for nvidia-smi can be obtained by passing a
     * -h option to it.
     *
     * Please note that the primary context always supports pinned allocations. Other
     * flags can be specified by ::cuDevicePrimaryCtxSetFlags().
     *
     * @param pctx Returned context handle of the new context
     * @param dev   - Device for which primary context is requested
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY,
     * CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuDevicePrimaryCtxRelease
     * @see JCudaDriver#cuDevicePrimaryCtxSetFlags
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetFlags
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */    
    public static int cuDevicePrimaryCtxRetain(CUcontext pctx, CUdevice dev)
    {
        return checkResult(cuDevicePrimaryCtxRetainNative(pctx, dev));
    }
    private static native int cuDevicePrimaryCtxRetainNative(CUcontext pctx, CUdevice dev);


    /**
     * Release the primary context on the GPU.
     *
     * Releases the primary context interop on the device.
     * A retained context should always be released once the user is done using
     * it. The context is automatically reset once the last reference to it is
     * released. This behavior is different when the primary context was retained
     * by the CUDA runtime from CUDA 4.0 and earlier. In this case, the primary
     * context remains always active.
     *
     * Releasing a primary context that has not been previously retained will
     * fail with ::CUDA_ERROR_INVALID_CONTEXT.
     *
     * Please note that unlike ::cuCtxDestroy() this method does not pop the context
     * from stack in any circumstances.
     *
     * @param dev Device which primary context is released
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuDevicePrimaryCtxRetain
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetFlags
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuDevicePrimaryCtxRelease(CUdevice dev)
    {
        return checkResult(cuDevicePrimaryCtxReleaseNative(dev));
    }
    private static native int cuDevicePrimaryCtxReleaseNative(CUdevice dev);

    /**
     * Set flags for the primary context.
     *
     * Sets the flags for the primary context on the device overwriting perviously
     * set ones.
     *
     * The three LSBs of the \p flags parameter can be used to control how the OS
     * thread, which owns the CUDA context at the time of an API call, interacts
     * with the OS scheduler when waiting for results from the GPU. Only one of
     * the scheduling flags can be set when creating a context.
     * 

     * 

     * CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
     * results from the GPU. This can decrease latency when waiting for the GPU,
     * but may lower the performance of CPU threads if they are performing work in
     * parallel with the CUDA thread.
     * 

     * 

     * CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for
     * results from the GPU. This can increase latency when waiting for the GPU,
     * but can increase the performance of CPU threads performing work in parallel
     * with the GPU.
     * 

     * 

     * CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
     * synchronization primitive when waiting for the GPU to finish work.
     * 

     * 

     * CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
     * synchronization primitive when waiting for the GPU to finish work. 

     * Deprecated: This flag was deprecated as of CUDA 4.0 and was
     * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC.
     * 

     * 

     * CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero,
     * uses a heuristic based on the number of active CUDA contexts in the
     * process \e C and the number of logical processors in the system \e P. If
     * \e C > \e P, then CUDA will yield to other OS threads when waiting for
     * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while
     * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN).
     * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on
     * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC
     * for low-powered devices.
     * 

     * 

     * CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
     * after resizing local memory for a kernel. This can prevent thrashing by
     * local memory allocations when launching many kernels with high local
     * memory usage at the cost of potentially increased memory usage. 

     * Deprecated: This flag is deprecated and the behavior enabled
     * by this flag is now the default and cannot be disabled.
     *
     * @param dev Device for which the primary context flags are set
     * @param flags New flags for the device
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * @see JCudaDriver#cuDevicePrimaryCtxRetain
     * @see JCudaDriver#cuDevicePrimaryCtxGetState
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxGetFlags
     * @see JCudaDriver#cudaSetDeviceFlags
     */
    public static int cuDevicePrimaryCtxSetFlags(CUdevice dev, int flags)
    {
        return checkResult(cuDevicePrimaryCtxSetFlagsNative(dev, flags));
    }
    private static native int cuDevicePrimaryCtxSetFlagsNative(CUdevice dev, int flags);


    /**
     * Get the state of the primary context.
     *
     * Returns in \p *flags the flags for the primary context of \p dev, and in
     * \p *active whether it is active.  See ::cuDevicePrimaryCtxSetFlags for flag
     * values.
     *
     * @param dev Device to get primary context flags for
     * @param flags Pointer to store flags
     * @param active Pointer to store context state; 0 = inactive, 1 = active
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_VALUE,
     * 
     * @see JCudaDriver#cuDevicePrimaryCtxSetFlags,
     * @see JCudaDriver#cuCtxGetFlags,
     * @see JCudaDriver#cudaGetDeviceFlags
     */
    public static int cuDevicePrimaryCtxGetState(CUdevice dev, int flags[], int active[])
    {
        return checkResult(cuDevicePrimaryCtxGetStateNative(dev, flags, active));
    }
    private static native int cuDevicePrimaryCtxGetStateNative(CUdevice dev, int flags[], int active[]);

    /**
     * Destroy all allocations and reset all state on the primary context.
     *
     * Explicitly destroys and cleans up all resources associated with the current
     * device in the current process.
     *
     * Note that it is responsibility of the calling function to ensure that no
     * other module in the process is using the device any more. For that reason
     * it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases.
     * However it is safe for other modules to call ::cuDevicePrimaryCtxRelease()
     * even after resetting the device.
     * Resetting the primary context does not release it, an application that has
     * retained the primary context should explicitly release its usage.
     *
     * @param dev Device for which primary context is destroyed
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
     *
     * @see JCudaDriver#cuDevicePrimaryCtxRetain
     * @see JCudaDriver#cuDevicePrimaryCtxRelease
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetFlags
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cudaDeviceReset
     */
    public static int cuDevicePrimaryCtxReset(CUdevice dev) 
    {
        return checkResult(cuDevicePrimaryCtxResetNative(dev));
    }
    private static native int cuDevicePrimaryCtxResetNative(CUdevice dev);

    /**
     * Returns the total amount of memory on the device.
     *
     *      * CUresult cuDeviceTotalMem (
     *      size_t* bytes,
     *      CUdevice dev )
     * 
     * 
     *   Returns the total amount of memory on
     *     the device.  Returns in *bytes the total amount of memory
     *     available on the device dev in bytes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param bytes Returned memory available on device in bytes
     * @param dev Device handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     */
    public static int cuDeviceTotalMem(long bytes[], CUdevice dev)
    {
        return checkResult(cuDeviceTotalMemNative(bytes, dev));
    }

    private static native int cuDeviceTotalMemNative(long bytes[], CUdevice dev);

    
    /**
     * Returns the maximum number of elements allocatable in a 1D linear 
     * texture for a given texture element size.
     *
     * Returns in \p maxWidthInElements the maximum number of texture elements 
     * allocatable in a 1D linear texture for given \p format and \p numChannels.
     *
     * @param maxWidthInElements Returned maximum number of texture elements allocatable for given \p format and \p numChannels.
     * @param format Texture format.
     * @param numChannels Number of channels per texture element.
     * @param dev Device handle.
     *
     * @return
     * CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuDeviceGetAttribute,
     * @see JCudaDriver#cuDeviceGetCount,
     * @see JCudaDriver#cuDeviceGetName,
     * @see JCudaDriver#cuDeviceGetUuid,
     * @see JCudaDriver#cuDeviceGet,
     * @see JCudaDriver#cudaMemGetInfo
     * @see JCudaDriver#cuDeviceTotalMem
     */
    public static int cuDeviceGetTexture1DLinearMaxWidth(long maxWidthInElements[], int format, int numChannels, CUdevice dev)
    {
        return checkResult(cuDeviceGetTexture1DLinearMaxWidthNative(maxWidthInElements, format, numChannels, dev));
    }
    private static native int cuDeviceGetTexture1DLinearMaxWidthNative(long maxWidthInElements[], int format, int numChannels, CUdevice dev);
    

    /**
     * Returns properties for a selected device.
     *
     *      * CUresult cuDeviceGetProperties (
     *      CUdevprop* prop,
     *      CUdevice dev )
     * 
     * 
     *   Returns properties for a selected device.
     *     DeprecatedThis function was deprecated
     *     as of CUDA 5.0 and replaced by cuDeviceGetAttribute().
     *   
     *   Returns in *prop the properties
     *     of device dev. The CUdevprop structure is defined as:
     *   
     *        typedef struct CUdevprop_st {
     *      int maxThreadsPerBlock;
     *      int maxThreadsDim[3];
     *      int maxGridSize[3];
     *      int sharedMemPerBlock;
     *      int totalConstantMemory;
     *      int SIMDWidth;
     *      int memPitch;
     *      int regsPerBlock;
     *      int clockRate;
     *      int textureAlign
     *   } CUdevprop;
     *   where:
     *   
     *     
     *       maxThreadsPerBlock is the
     *         maximum number of threads per block;
     *       
     *     
     *     
     *       maxThreadsDim[3] is the maximum
     *         sizes of each dimension of a block;
     *       
     *     
     *     
     *       maxGridSize[3] is the maximum
     *         sizes of each dimension of a grid;
     *       
     *     
     *     
     *       sharedMemPerBlock is the total
     *         amount of shared memory available per block in bytes;
     *       
     *     
     *     
     *       totalConstantMemory is the
     *         total amount of constant memory available on the device in bytes;
     *       
     *     
     *     
     *       SIMDWidth is the warp
     *         size;
     *       
     *     
     *     
     *       memPitch is the maximum pitch
     *         allowed by the memory copy functions that involve memory regions
     *         allocated through cuMemAllocPitch();
     *       
     *     
     *     
     *       regsPerBlock is the total
     *         number of registers available per block;
     *       
     *     
     *     
     *       clockRate is the clock frequency
     *         in kilohertz;
     *       
     *     
     *     
     *       textureAlign is the alignment
     *         requirement; texture base addresses that are aligned to textureAlign
     *         bytes do not need an offset
     *         applied to texture fetches.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param prop Returned properties of device
     * @param dev Device to get properties for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     * 
     * @deprecated Deprecated as of CUDA 5.0, replaced with {@link JCudaDriver#cuDeviceGetAttribute(int[], int, CUdevice)}
     */
    @Deprecated
    public static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev)
    {
        return checkResult(cuDeviceGetPropertiesNative(prop, dev));
    }

    private static native int cuDeviceGetPropertiesNative(CUdevprop prop, CUdevice dev);


    /**
     * Returns information about the device.
     *
     *      * CUresult cuDeviceGetAttribute (
     *      int* pi,
     *      CUdevice_attribute attrib,
     *      CUdevice dev )
     * 
     * 
     *   Returns information about the device.
     *     Returns in *pi the integer value of the attribute attrib on device dev. The supported attributes are:
     *   

     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads
     *         per block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X:
     *         Maximum x-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y:
     *         Maximum y-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z:
     *         Maximum z-dimension of a block;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X:
     *         Maximum x-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y:
     *         Maximum y-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z:
     *         Maximum z-dimension of a grid;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
     *         shared memory available to a thread block in bytes; this amount is
     *         shared by all thread blocks simultaneously
     *         resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device
     *         for __constant__ variables in a CUDA C kernel in bytes;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_WARP_SIZE:
     *         Warp size in threads;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_PITCH:
     *         Maximum pitch in bytes allowed by the memory copy functions that
     *         involve memory regions allocated through cuMemAllocPitch();
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width for
     *         a 1D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum
     *         mipmapped 1D texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D texture
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width for
     *         a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height
     *         for a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch in
     *         bytes for a 2D texture bound to linear memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum
     *         mipmapped 2D texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum
     *         mipmapped 2D texture height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D texture
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D texture
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D texture
     *         depth;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: Alternate
     *         maximum 3D texture width, 0 if no alternate maximum 3D texture size is
     *         supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: Alternate
     *         maximum 3D texture height, 0 if no alternate maximum 3D texture size
     *         is supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: Alternate
     *         maximum 3D texture depth, 0 if no alternate maximum 3D texture size is
     *         supported;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: Maximum cubemap
     *         texture width or height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: Maximum 1D
     *         layered texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: Maximum layers
     *         in a 1D layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: Maximum 2D
     *         layered texture width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: Maximum 2D
     *         layered texture height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: Maximum layers
     *         in a 2D layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: Maximum
     *         cubemap layered texture width or height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: Maximum
     *         layers in a cubemap layered texture;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: Maximum 1D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: Maximum 2D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: Maximum 2D surface
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: Maximum 3D surface
     *         width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: Maximum 3D surface
     *         height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: Maximum 3D surface
     *         depth;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: Maximum 1D
     *         layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: Maximum layers
     *         in a 1D layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: Maximum 2D
     *         layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: Maximum 2D
     *         layered surface height;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: Maximum layers
     *         in a 2D layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: Maximum cubemap
     *         surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: Maximum
     *         cubemap layered surface width;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: Maximum
     *         layers in a cubemap layered surface;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit
     *         registers available to a thread block; this number is shared by all
     *         thread blocks simultaneously
     *         resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CLOCK_RATE:
     *         Typical clock frequency in kilohertz;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT:
     *         Alignment requirement; texture base addresses aligned to textureAlign
     *         bytes do not need an offset applied to texture fetches;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment
     *         requirement for 2D texture references bound to pitched memory;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_GPU_OVERLAP:
     *         1 if the device can concurrently copy memory between host and device
     *         while executing a kernel, or 0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors
     *         on the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT:
     *         1 if there is a run time limit for kernels executed on the device, or
     *         0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_INTEGRATED:
     *         1 if the device is integrated with the memory subsystem, or 0 if not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY:
     *         1 if the device can map host memory into the CUDA address space, or 0
     *         if not;
     *       
     *     
     *     
     *       
     *         CU_DEVICE_ATTRIBUTE_COMPUTE_MODE:
     *         Compute mode that device is currently in. Available modes are as
     *         follows:
     *         
     *           
     *             CU_COMPUTEMODE_DEFAULT:
     *               Default mode - Device is not restricted and can have multiple CUDA
     *               contexts present at a single time.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_EXCLUSIVE:
     *               Compute-exclusive mode - Device can have only one CUDA context present
     *               on it at a time.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_PROHIBITED:
     *               Compute-prohibited mode - Device is prohibited from creating new CUDA
     *               contexts.
     *             
     *           
     *           
     *             CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode -
     *               Device can have only one context used by a single process at a time.
     *             
     *           
     *         
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS:
     *         1 if the device supports executing multiple kernels within the same
     *         context simultaneously, or 0 if not. It is not guaranteed
     *         that multiple kernels will be
     *         resident on the device concurrently so this feature should not be
     *         relied upon for correctness;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_ECC_ENABLED:
     *         1 if error correction is enabled on the device, 0 if error correction
     *         is disabled or not supported by the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PCI_BUS_ID:
     *         PCI bus identifier of the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID:
     *         PCI device (also known as slot) identifier of the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID:
     *         PCI domain identifier of the device
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_TCC_DRIVER:
     *         1 if the device is using a TCC driver. TCC is only available on Tesla
     *         hardware running Windows Vista or later;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE:
     *         Peak memory clock frequency in kilohertz;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width
     *         in bits;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE:
     *         Size of L2 cache in bytes. 0 if the device doesn't have L2 cache;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident
     *         threads per multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING:
     *         1 if the device shares a unified address space with the host, or 0 if
     *         not;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability
     *         version number;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability
     *         version number;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: 1 if device supports caching globals 
     *       in L1 cache, 0 if caching globals in L1 cache is not supported by the device
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: 1 if device supports caching locals 
     *       in L1 cache, 0 if caching locals in L1 cache is not supported by the device;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: Maximum amount of
     *       shared memory available to a multiprocessor in bytes; this amount is shared
     *       by all thread blocks simultaneously resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: Maximum number of 32-bit
     *       registers available to a multiprocessor; this number is shared by all thread
     *       blocks simultaneously resident on a multiprocessor;
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY:  1 if device supports allocating managed memory
     *       on this system, 0 if allocating managed memory is not supported by the device on this system.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: 1 if device is on a multi-GPU board, 0 if not.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: Unique identifier for a group of devices
     *       associated with the same board. Devices on the same multi-GPU board will share the same identifier.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: 1 if Link between the device and the host
     *       supports native atomic operations.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance
     *       (in floating-point operations per second) to double precision performance.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device suppports coherently accessing
     *       pageable memory without calling cudaHostRegister on it.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory
     *        concurrently with the CPU.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: Device supports Compute Preemption.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered
     *       memory at the same virtual address as the CPU.
     *       
     *     
     *     
     *       CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size
     *       suported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() call.
     *       For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pi Returned device attribute value
     * @param attrib Device attribute to query
     * @param dev Device handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGetCount
     * @see JCudaDriver#cuDeviceGetName
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceTotalMem
     */
    public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev)
    {
        return checkResult(cuDeviceGetAttributeNative(pi, attrib, dev));
    }

    private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev);

    /**
     * Returns the latest CUDA version supported by driver.
     *
     *      * CUresult cuDriverGetVersion (
     *      int* driverVersion )
     * 
     * 
     *   Returns the CUDA driver version.  Returns
     *     in *driverVersion the version  of CUDA supported by
     *     the driver.
     *     The version is returned as (1000 * major + 10 * minor). 
     *     For example, CUDA 9.2 would be represented by 9020.
     *     This function automatically returns CUDA_ERROR_INVALID_VALUE
     *     if the driverVersion argument is NULL.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param driverVersion Returns the CUDA driver version
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
     *
     */
    public static int cuDriverGetVersion (int driverVersion[])
    {
        return checkResult(cuDriverGetVersionNative(driverVersion));
    }
    private static native int cuDriverGetVersionNative(int driverVersion[]);



    /**
     * Create a CUDA context.
     *
     *      * CUresult cuCtxCreate (
     *      CUcontext* pctx,
     *      unsigned int  flags,
     *      CUdevice dev )
     * 
     * 
     *   Create a CUDA context.  Creates a new
     *     CUDA context and associates it with the calling thread. The flags parameter is described below. The context is created with
     *     a usage count of 1 and the caller of cuCtxCreate() must call
     *     cuCtxDestroy() or when done using the context. If a context is already
     *     current to the thread, it is supplanted by the newly created context
     *     and may be restored by a subsequent call
     *     to cuCtxPopCurrent().
     *   
     *   The three LSBs of the flags
     *     parameter can be used to control how the OS thread, which owns the CUDA
     *     context at the time of an API call, interacts with
     *     the OS scheduler when waiting for results
     *     from the GPU. Only one of the scheduling flags can be set when creating
     *     a context.
     *   
     *   
     *     
     *       CU_CTX_SCHED_AUTO: The default
     *         value if the flags parameter is zero, uses a heuristic based
     *         on the number of active CUDA contexts in the process C and the number
     *         of logical
     *         processors in the system P. If
     *         C > P, then CUDA will yield to other OS threads when waiting for
     *         the GPU, otherwise CUDA will
     *         not yield while waiting for
     *         results and actively spin on the processor.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_SPIN: Instruct
     *         CUDA to actively spin when waiting for results from the GPU. This can
     *         decrease latency when waiting for the GPU,
     *         but may lower the performance
     *         of CPU threads if they are performing work in parallel with the CUDA
     *         thread.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_YIELD: Instruct
     *         CUDA to yield its thread when waiting for results from the GPU. This
     *         can increase latency when waiting for the
     *         GPU, but can increase the
     *         performance of CPU threads performing work in parallel with the GPU.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_SCHED_BLOCKING_SYNC:
     *         Instruct CUDA to block the CPU thread on a synchronization primitive
     *         when waiting for the GPU to finish work.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_BLOCKING_SYNC: Instruct
     *         CUDA to block the CPU thread on a synchronization primitive when
     *         waiting for the GPU to finish work.
     *       
     *       Deprecated:
     *         This flag was deprecated as of CUDA 4.0 and was replaced with
     *         CU_CTX_SCHED_BLOCKING_SYNC.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_MAP_HOST: Instruct CUDA
     *         to support mapped pinned allocations. This flag must be set in order
     *         to allocate pinned host memory that is
     *         accessible to the GPU.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_CTX_LMEM_RESIZE_TO_MAX:
     *         Instruct CUDA to not reduce local memory after resizing local memory
     *         for a kernel. This can prevent thrashing by local memory
     *         allocations when launching many
     *         kernels with high local memory usage at the cost of potentially
     *         increased memory usage.
     *       
     *     
     *   
     *   
     *   Context creation will fail with
     *     CUDA_ERROR_UNKNOWN if the compute mode of the device is
     *     CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail
     *     with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to
     *     CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the
     *     device. The function cuDeviceGetAttribute() can be used with
     *     CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the
     *     device. The nvidia-smi tool can be used to set the compute mode for
     *     devices. Documentation
     *     for nvidia-smi can be obtained by passing
     *     a -h option to it.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pctx Returned context handle of the new context
     * @param flags Context creation flags
     * @param dev Device to create context on
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev)
    {
        return checkResult(cuCtxCreateNative(pctx, flags, dev));
    }

    private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev);


    /**
     * Destroy a CUDA context.
     *
     *      * CUresult cuCtxDestroy (
     *      CUcontext ctx )
     * 
     * 
     *   Destroy a CUDA context.  Destroys the
     *     CUDA context specified by ctx. The context ctx will
     *     be destroyed regardless of how many threads it is current to. It is
     *     the responsibility of the calling function to ensure
     *     that no API call issues using ctx while cuCtxDestroy() is executing.
     *   
     *   If ctx is current to the
     *     calling thread then ctx will also be popped from the current
     *     thread's context stack (as though cuCtxPopCurrent() were called). If
     *     ctx is current to other threads, then ctx will
     *     remain current to those threads, and attempting to access ctx
     *     from those threads will result in the error
     *     CUDA_ERROR_CONTEXT_IS_DESTROYED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param ctx Context to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxDestroy(CUcontext ctx)
    {
        return checkResult(cuCtxDestroyNative(ctx));
    }

    private static native int cuCtxDestroyNative(CUcontext ctx);


    /**
     * Increment a context's usage-count.
     *
     *      * CUresult cuCtxAttach (
     *      CUcontext* pctx,
     *      unsigned int  flags )
     * 
     * 
     *   Increment a context's usage-count.
     *     DeprecatedNote that this function is
     *     deprecated and should not be used.
     *   
     *   Increments the usage count of the
     *     context and passes back a context handle in *pctx that must
     *     be passed to cuCtxDetach() when the application is done with the
     *     context. cuCtxAttach() fails if there is no context current to the
     *     thread.
     *   
     *   Currently, the flags parameter
     *     must be 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pctx Returned context handle of the current context
     * @param flags Context attach flags (must be 0)
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxDetach
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuCtxAttach(CUcontext pctx, int flags)
    {
        return checkResult(cuCtxAttachNative(pctx, flags));
    }

    private static native int cuCtxAttachNative(CUcontext pctx, int flags);


    /**
     * Decrement a context's usage-count.
     *
     *      * CUresult cuCtxDetach (
     *      CUcontext ctx )
     * 
     * 
     *   Decrement a context's usage-count.
     *     DeprecatedNote that this function is
     *     deprecated and should not be used.
     *   
     *   Decrements the usage count of the
     *     context ctx, and destroys the context if the usage count goes
     *     to 0. The context must be a handle that was passed back by cuCtxCreate()
     *     or cuCtxAttach(), and must be current to the calling thread.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param ctx Context to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuCtxDetach(CUcontext ctx)
    {
        return checkResult(cuCtxDetachNative(ctx));
    }

    private static native int cuCtxDetachNative(CUcontext ctx);


    /**
     * Pushes a context on the current CPU thread.
     *
     *      * CUresult cuCtxPushCurrent (
     *      CUcontext ctx )
     * 
     * 
     *   Pushes a context on the current CPU
     *     thread.  Pushes the given context ctx onto the CPU thread's
     *     stack of current contexts. The specified context becomes the CPU
     *     thread's current context, so all CUDA
     *     functions that operate on the current
     *     context are affected.
     *   
     *   The previous current context may be made
     *     current again by calling cuCtxDestroy() or cuCtxPopCurrent().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param ctx Context to push
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxPushCurrent(CUcontext ctx)
    {
        return checkResult(cuCtxPushCurrentNative(ctx));
    }

    private static native int cuCtxPushCurrentNative(CUcontext ctx);


    /**
     * Pops the current CUDA context from the current CPU thread.
     *
     *      * CUresult cuCtxPopCurrent (
     *      CUcontext* pctx )
     * 
     * 
     *   Pops the current CUDA context from the
     *     current CPU thread.  Pops the current CUDA context from the CPU thread
     *     and passes back
     *     the old context handle in *pctx.
     *     That context may then be made current to a different CPU thread by
     *     calling cuCtxPushCurrent().
     *   
     *   If a context was current to the CPU
     *     thread before cuCtxCreate() or cuCtxPushCurrent() was called, this
     *     function makes that context current to the CPU thread again.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pctx Returned new context handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxPopCurrent(CUcontext pctx)
    {
        return checkResult(cuCtxPopCurrentNative(pctx));
    }

    private static native int cuCtxPopCurrentNative(CUcontext pctx);


    /**
     * Binds the specified CUDA context to the calling CPU thread.
     *
     *      * CUresult cuCtxSetCurrent (
     *      CUcontext ctx )
     * 
     * 
     *   Binds the specified CUDA context to the
     *     calling CPU thread.  Binds the specified CUDA context to the calling
     *     CPU thread. If
     *     ctx is NULL then the CUDA
     *     context previously bound to the calling CPU thread is unbound and
     *     CUDA_SUCCESS is returned.
     *   
     *   If there exists a CUDA context stack on
     *     the calling CPU thread, this will replace the top of that stack with
     *     ctx. If ctx is NULL then this will be equivalent
     *     to popping the top of the calling CPU thread's CUDA context stack (or
     *     a no-op if the
     *     calling CPU thread's CUDA context stack
     *     is empty).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param ctx Context to bind to the calling CPU thread
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxGetCurrent
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     */
    public static int cuCtxSetCurrent(CUcontext ctx)
    {
        return checkResult(cuCtxSetCurrentNative(ctx));
    }

    private static native int cuCtxSetCurrentNative(CUcontext ctx);


    /**
     * Returns the CUDA context bound to the calling CPU thread.
     *
     *      * CUresult cuCtxGetCurrent (
     *      CUcontext* pctx )
     * 
     * 
     *   Returns the CUDA context bound to the
     *     calling CPU thread.  Returns in *pctx the CUDA context bound
     *     to the calling CPU thread. If no context is bound to the calling CPU
     *     thread then *pctx is set to NULL and CUDA_SUCCESS is
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pctx Returned context handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     *
     * @see JCudaDriver#cuCtxSetCurrent
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     */
    public static int cuCtxGetCurrent(CUcontext pctx)
    {
        return checkResult(cuCtxGetCurrentNative(pctx));
    }

    private static native int cuCtxGetCurrentNative(CUcontext pctx);


    /**
     * Returns the device ID for the current context.
     *
     *      * CUresult cuCtxGetDevice (
     *      CUdevice* device )
     * 
     * 
     *   Returns the device ID for the current
     *     context.  Returns in *device the ordinal of the current
     *     context's device.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param device Returned device ID for the current context
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxGetDevice(CUdevice device)
    {
        return checkResult(cuCtxGetDeviceNative(device));
    }

    private static native int cuCtxGetDeviceNative(CUdevice device);


    public static int cuCtxGetFlags(int flags[])
    {
        return checkResult(cuCtxGetFlagsNative(flags));
    }
    private static native int cuCtxGetFlagsNative(int flags[]);

    /**
     * Block for a context's tasks to complete.
     *
     *      * CUresult cuCtxSynchronize (
     *      void )
     * 
     * 
     *   Block for a context's tasks to complete.
     *     Blocks until the device has completed all preceding requested tasks.
     *     cuCtxSynchronize() returns an error if one of the preceding tasks
     *     failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC
     *     flag, the CPU thread will block until the GPU context has finished its
     *     work.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     */
    public static int cuCtxSynchronize()
    {
        return checkResult(cuCtxSynchronizeNative());
    }

    private static native int cuCtxSynchronizeNative();


    /**
     * Loads a compute module.
     *
     *      * CUresult cuModuleLoad (
     *      CUmodule* module,
     *      const char* fname )
     * 
     * 
     *   Loads a compute module.  Takes a filename
     *     fname and loads the corresponding module module
     *     into the current context. The CUDA driver API does not attempt to
     *     lazily allocate the resources needed by a module; if the
     *     memory for functions and data (constant
     *     and global) needed by the module cannot be allocated, cuModuleLoad()
     *     fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc
     *     or handwritten, or a fatbin file as output by nvcc
     *     from toolchain 4.0 or later.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param module Returned module
     * @param fname Filename of module to load
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleLoad(CUmodule module, String fname)
    {
        return checkResult(cuModuleLoadNative(module, fname));
    }

    private static native int cuModuleLoadNative(CUmodule module, String fname);


    /**
     * Load a module's data.
     *
     *      * CUresult cuModuleLoadData (
     *      CUmodule* module,
     *      const void* image )
     * 
     * 
     *   Load a module's data.  Takes a pointer
     *     image and loads the corresponding module module
     *     into the current context. The pointer may be obtained by mapping a
     *     cubin or PTX or fatbin file, passing a cubin or PTX or
     *     fatbin file as a NULL-terminated text
     *     string, or incorporating a cubin or fatbin object into the executable
     *     resources and
     *     using operating system calls such as
     *     Windows FindResource() to obtain the pointer.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param module Returned module
     * @param image Module data to load
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleLoadData(CUmodule module, byte image[])
    {
        return checkResult(cuModuleLoadDataNative(module, image));
    }

    private static native int cuModuleLoadDataNative(CUmodule module, byte image[]);




    /**
     * Load a module's data with options.

     * 

     * Note: It is hardly possible to properly pass in the required
     * option values for this method. Thus, the arguments here must be 

     * numOptions=0 

     * options=new int[0] 

     * optionValues=Pointer.to(new int[0]))

     * For passing in real options, use
     * {@link #cuModuleLoadDataJIT(CUmodule, Pointer, JITOptions)} instead
     *
     *      * CUresult cuModuleLoadDataEx (
     *      CUmodule* module,
     *      const void* image,
     *      unsigned int  numOptions,
     *      CUjit_option* options,
     *      void** optionValues )
     * 
     * 
     *   Load a module's data with options.  Takes
     *     a pointer image and loads the corresponding module module into the current context. The pointer may be obtained by
     *     mapping a cubin or PTX or fatbin file, passing a cubin or PTX or
     *     fatbin file as a NULL-terminated text
     *     string, or incorporating a cubin or fatbin object into the executable
     *     resources and
     *     using operating system calls such as
     *     Windows FindResource() to obtain the pointer. Options are
     *     passed as an array via options and any corresponding
     *     parameters are passed in optionValues. The number of total
     *     options is supplied via numOptions. Any outputs will be
     *     returned via optionValues. Supported options are (types for
     *     the option values are specified in parentheses after the option name):
     *   
     *   
     *     
     *       CU_JIT_MAX_REGISTERS: (unsigned
     *         int) input specifies the maximum number of registers per thread;
     *       
     *     
     *     
     *       CU_JIT_THREADS_PER_BLOCK:
     *         (unsigned int) input specifies number of threads per block to target
     *         compilation for; output returns the number of threads
     *         the compiler actually targeted;
     *       
     *     
     *     
     *       CU_JIT_WALL_TIME: (float)
     *         output returns the float value of wall clock time, in milliseconds,
     *         spent compiling the PTX code;
     *       
     *     
     *     
     *       CU_JIT_INFO_LOG_BUFFER: (char*)
     *         input is a pointer to a buffer in which to print any informational log
     *         messages from PTX assembly (the buffer size
     *         is specified via option
     *         CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
     *       
     *     
     *     
     *       CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES:
     *         (unsigned int) input is the size in bytes of the buffer; output is the
     *         number of bytes filled with messages;
     *       
     *     
     *     
     *       CU_JIT_ERROR_LOG_BUFFER:
     *         (char*) input is a pointer to a buffer in which to print any error log
     *         messages from PTX assembly (the buffer size is specified
     *         via option
     *         CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
     *       
     *     
     *     
     *       CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES:
     *         (unsigned int) input is the size in bytes of the buffer; output is the
     *         number of bytes filled with messages;
     *       
     *     
     *     
     *       CU_JIT_OPTIMIZATION_LEVEL:
     *         (unsigned int) input is the level of optimization to apply to generated
     *         code (0 - 4), with 4 being the default and highest
     *         level;
     *       
     *     
     *     
     *       CU_JIT_TARGET_FROM_CUCONTEXT:
     *         (No option value) causes compilation target to be determined based on
     *         current attached context (default);
     *       
     *     
     *     
     *       
     *         CU_JIT_TARGET: (unsigned int
     *         for enumerated type CUjit_target_enum) input is the compilation target
     *         based on supplied CUjit_target_enum;
     *         possible values are:
     *         
     *           
     *             CU_TARGET_COMPUTE_10
     *           
     *           
     *             CU_TARGET_COMPUTE_11
     *           
     *           
     *             CU_TARGET_COMPUTE_12
     *           
     *           
     *             CU_TARGET_COMPUTE_13
     *           
     *           
     *             CU_TARGET_COMPUTE_20
     *           
     *         
     *       
     *     
     *     
     *       
     *         CU_JIT_FALLBACK_STRATEGY:
     *         (unsigned int for enumerated type CUjit_fallback_enum) chooses fallback
     *         strategy if matching cubin is not found; possible
     *         values are:
     *         
     *           
     *             CU_PREFER_PTX
     *           
     *           
     *             CU_PREFER_BINARY
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param module Returned module
     * @param image Module data to load
     * @param numOptions Number of options
     * @param options Options for JIT
     * @param optionValues Option values for JIT
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleLoadDataEx (CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues)
    {
        // Although it should be possible to pass 'null' for these parameters
        // when numOptions==0, the driver crashes when they are 'null', so
        // they are replaced by non-null (but empty) arrays here.
        // Also see the corresponding notes in the native method.
        if (numOptions == 0)
        {
            if (options == null)
            {
                options = new int[0];
            }
            if (optionValues == null)
            {
                optionValues = Pointer.to(new int[0]);
            }
        }
        return checkResult(cuModuleLoadDataExNative(
            phMod, p, numOptions, options, optionValues));
    }
    private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues);



    /**
     * Load a module's data.
     *
     *      * CUresult cuModuleLoadFatBinary (
     *      CUmodule* module,
     *      const void* fatCubin )
     * 
     * 
     *   Load a module's data.  Takes a pointer
     *     fatCubin and loads the corresponding module module
     *     into the current context. The pointer represents a fat binary object,
     *     which is a collection of different cubin and/or PTX
     *     files, all representing the same device
     *     code, but compiled and optimized for different architectures.
     *   
     *   Prior to CUDA 4.0, there was no
     *     documented API for constructing and using fat binary objects by
     *     programmers. Starting with
     *     CUDA 4.0, fat binary objects can be
     *     constructed by providing the -fatbin option to nvcc.
     *     More information can be found in the nvcc document.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param module Returned module
     * @param fatCubin Fat binary to load
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
     * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleLoadFatBinary(CUmodule module, byte fatCubin[])
    {
        return checkResult(cuModuleLoadFatBinaryNative(module, fatCubin));
    }

    private static native int cuModuleLoadFatBinaryNative(CUmodule module, byte fatCubin[]);


    /**
     * Unloads a module.
     *
     *      * CUresult cuModuleUnload (
     *      CUmodule hmod )
     * 
     * 
     *   Unloads a module.  Unloads a module hmod from the current context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hmod Module to unload
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     */
    public static int cuModuleUnload(CUmodule hmod)
    {
        return checkResult(cuModuleUnloadNative(hmod));
    }

    private static native int cuModuleUnloadNative(CUmodule hmod);


    /**
     * Returns a function handle.
     *
     *      * CUresult cuModuleGetFunction (
     *      CUfunction* hfunc,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a function handle.  Returns in
     *     *hfunc the handle of the function of name name
     *     located in module hmod. If no function of that name exists,
     *     cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Returned function handle
     * @param hmod Module to retrieve function from
     * @param name Name of function to retrieve
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     *
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetFunctionNative(hfunc, hmod, name));
    }

    private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name);


    /**
     * Returns a global pointer from a module.
     *
     *      * CUresult cuModuleGetGlobal (
     *      CUdeviceptr* dptr,
     *      size_t* bytes,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a global pointer from a module.
     *     Returns in *dptr and *bytes the base pointer and
     *     size of the global of name name located in module hmod. If no variable of that name exists, cuModuleGetGlobal()
     *     returns CUDA_ERROR_NOT_FOUND. Both parameters dptr and bytes are optional. If one of them is NULL, it is ignored.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Returned global device pointer
     * @param bytes Returned global size in bytes
     * @param hmod Module to retrieve global from
     * @param name Name of global to retrieve
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleGetGlobal(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetGlobalNative(dptr, bytes, hmod, name));
    }

    private static native int cuModuleGetGlobalNative(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name);


    /**
     * Returns a handle to a texture reference.
     *
     *      * CUresult cuModuleGetTexRef (
     *      CUtexref* pTexRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a handle to a texture reference.
     *     Returns in *pTexRef the handle of the texture reference of
     *     name name in the module hmod. If no texture
     *     reference of that name exists, cuModuleGetTexRef() returns
     *     CUDA_ERROR_NOT_FOUND. This texture reference handle should not be
     *     destroyed, since it will be destroyed when the module is unloaded.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pTexRef Returned texture reference
     * @param hmod Module to retrieve texture reference from
     * @param name Name of texture reference to retrieve
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetTexRefNative(pTexRef, hmod, name));
    }

    private static native int cuModuleGetTexRefNative(CUtexref pTexRef, CUmodule hmod, String name);


    /**
     * Returns a handle to a surface reference.
     *
     *      * CUresult cuModuleGetSurfRef (
     *      CUsurfref* pSurfRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
     * 
     *   Returns a handle to a surface reference.
     *     Returns in *pSurfRef the handle of the surface reference of
     *     name name in the module hmod. If no surface
     *     reference of that name exists, cuModuleGetSurfRef() returns
     *     CUDA_ERROR_NOT_FOUND.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pSurfRef Returned surface reference
     * @param hmod Module to retrieve surface reference from
     * @param name Name of surface reference to retrieve
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_FOUND
     *
     * @see JCudaDriver#cuModuleGetFunction
     * @see JCudaDriver#cuModuleGetGlobal
     * @see JCudaDriver#cuModuleGetTexRef
     * @see JCudaDriver#cuModuleLoad
     * @see JCudaDriver#cuModuleLoadData
     * @see JCudaDriver#cuModuleLoadDataEx
     * @see JCudaDriver#cuModuleLoadFatBinary
     * @see JCudaDriver#cuModuleUnload
     */
    public static int cuModuleGetSurfRef(CUsurfref pSurfRef, CUmodule hmod, String name)
    {
        return checkResult(cuModuleGetSurfRefNative(pSurfRef, hmod, name));
    }
    private static native int cuModuleGetSurfRefNative(CUsurfref pSurfRef, CUmodule hmod, String name);


    public static int cuLinkCreate(JITOptions jitOptions, CUlinkState stateOut)
    {
        return checkResult(cuLinkCreateNative(jitOptions, stateOut));
    }
    private static native int cuLinkCreateNative(JITOptions jitOptions, CUlinkState stateOut);


    public static int cuLinkAddData(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions)
    {
        return checkResult(cuLinkAddDataNative(state, type, data, size, name, jitOptions));
    }
    private static native int cuLinkAddDataNative(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions);

    public static int cuLinkAddFile(CUlinkState state, int type, String path, JITOptions jitOptions)
    {
        return checkResult(cuLinkAddFileNative(state, type, path, jitOptions));
    }
    private static native int cuLinkAddFileNative(CUlinkState state, int type, String path, JITOptions jitOptions);


    public static int cuLinkComplete(CUlinkState state, Pointer cubinOut, long sizeOut[])
    {
        return checkResult(cuLinkCompleteNative(state, cubinOut, sizeOut));
    }
    private static native int cuLinkCompleteNative(CUlinkState state, Pointer cubinOut, long sizeOut[]);


    public static int cuLinkDestroy(CUlinkState state)
    {
        return checkResult(cuLinkDestroyNative(state));
    }
    private static native int cuLinkDestroyNative(CUlinkState state);



    /**
     * Gets free and total memory.
     *
     *      * CUresult cuMemGetInfo (
     *      size_t* free,
     *      size_t* total )
     * 
     * 
     *   Gets free and total memory.  Returns in
     *     *free and *total respectively, the free and total
     *     amount of memory available for allocation by the CUDA context, in
     *     bytes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param free Returned free memory in bytes
     * @param total Returned total memory in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemGetInfo(long free[], long total[])
    {
        return checkResult(cuMemGetInfoNative(free, total));
    }

    private static native int cuMemGetInfoNative(long free[], long total[]);


    /**
     * Allocates page-locked host memory.
     *
     *      * CUresult cuMemHostAlloc (
     *      void** pp,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
     * 
     *   Allocates page-locked host memory.
     *     Allocates bytesize bytes of host memory that is page-locked
     *     and accessible to the device. The driver tracks the virtual memory
     *     ranges allocated
     *     with this function and automatically
     *     accelerates calls to functions such as cuMemcpyHtoD(). Since the memory
     *     can be accessed directly by the device, it can be read or written with
     *     much higher bandwidth than pageable
     *     memory obtained with functions such as
     *     malloc(). Allocating excessive amounts of pinned memory may degrade
     *     system performance,
     *     since it reduces the amount of memory
     *     available to the system for paging. As a result, this function is best
     *     used sparingly
     *     to allocate staging areas for data
     *     exchange between host and device.
     *   
     *   The Flags parameter enables
     *     different options to be specified that affect the allocation, as
     *     follows.
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_PORTABLE: The
     *         memory returned by this call will be considered as pinned memory by
     *         all CUDA contexts, not just the one that performed
     *         the allocation.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_DEVICEMAP: Maps
     *         the allocation into the CUDA address space. The device pointer to the
     *         memory may be obtained by calling cuMemHostGetDevicePointer(). This
     *         feature is available only on GPUs with compute capability greater than
     *         or equal to 1.1.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTREGISTER_IOMEMORY:
     *       The pointer is treated as pointing to some
     *       I/O memory space, e.g. the PCI Express resource of a 3rd party device. 
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_MEMHOSTALLOC_WRITECOMBINED:
     *         Allocates the memory as write-combined (WC). WC memory can be
     *         transferred across the PCI Express bus more quickly on some
     *         system configurations, but
     *         cannot be read efficiently by most CPUs. WC memory is a good option
     *         for buffers that will be written
     *         by the CPU and read by the GPU
     *         via mapped pinned memory or host->device transfers.
     *       
     *     
     *   
     *   
     *   All of these flags are orthogonal to
     *     one another: a developer may allocate memory that is portable, mapped
     *     and/or write-combined
     *     with no restrictions.
     *   
     *   The CUDA context must have been created
     *     with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_DEVICEMAP
     *     flag to have any effect.
     *   
     *   The CU_MEMHOSTALLOC_DEVICEMAP flag may
     *     be specified on CUDA contexts for devices that do not support mapped
     *     pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
     *     because the memory may be mapped into other CUDA contexts via the
     *     CU_MEMHOSTALLOC_PORTABLE flag.
     *   
     *   The memory allocated by this function
     *     must be freed with cuMemFreeHost().
     *   
     *   Note all host memory allocated using
     *     cuMemHostAlloc() will automatically be immediately accessible to all
     *     contexts on all devices which support unified addressing (as may be
     *     queried
     *     using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
     *     Unless the flag CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device
     *     pointer that may be used to access this host memory from those contexts
     *     is always equal to the returned
     *     host pointer *pp. If the flag
     *     CU_MEMHOSTALLOC_WRITECOMBINED is specified, then the function
     *     cuMemHostGetDevicePointer() must be used to query the device pointer,
     *     even if the context supports unified addressing. See Unified Addressing
     *     for additional details.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pp Returned host pointer to page-locked memory
     * @param bytesize Requested allocation size in bytes
     * @param Flags Flags for allocation request
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
     * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED 
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemHostAlloc(Pointer pp, long bytes, int Flags)
    {
        return checkResult(cuMemHostAllocNative(pp, bytes, Flags));
    }
    private static native int cuMemHostAllocNative(Pointer pp, long bytes, int Flags);


    /**
     * Passes back device pointer of mapped pinned memory.
     *
     *      * CUresult cuMemHostGetDevicePointer (
     *      CUdeviceptr* pdptr,
     *      void* p,
     *      unsigned int  Flags )
     * 
     * 
     *   Passes back device pointer of mapped
     *     pinned memory.  Passes back the device pointer pdptr
     *     corresponding to the mapped, pinned host buffer p allocated
     *     by cuMemHostAlloc.
     *   
     *   cuMemHostGetDevicePointer() will fail
     *     if the CU_MEMHOSTALLOC_DEVICEMAP flag was not specified at the time
     *     the memory was allocated, or if the function is called on a GPU that
     *     does not support
     *     mapped pinned memory.
     *   
     *   Flags provides for future
     *     releases. For now, it must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pdptr Returned device pointer
     * @param p Host pointer
     * @param Flags Options (must be 0)
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags)
    {
        return checkResult(cuMemHostGetDevicePointerNative(ret, p, Flags));
    }
    private static native int cuMemHostGetDevicePointerNative(CUdeviceptr ret, Pointer p, int Flags);


    /**
     * Passes back flags that were used for a pinned allocation.
     *
     *      * CUresult cuMemHostGetFlags (
     *      unsigned int* pFlags,
     *      void* p )
     * 
     * 
     *   Passes back flags that were used for a
     *     pinned allocation.  Passes back the flags pFlags that were
     *     specified when allocating the pinned host buffer p allocated
     *     by cuMemHostAlloc.
     *   
     *   cuMemHostGetFlags() will fail if the
     *     pointer does not reside in an allocation performed by cuMemAllocHost()
     *     or cuMemHostAlloc().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pFlags Returned flags word
     * @param p Host pointer
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemHostAlloc
     */
    public static int cuMemHostGetFlags (int pFlags[], Pointer p)
    {
        return checkResult(cuMemHostGetFlagsNative(pFlags, p));
    }

    private static native int cuMemHostGetFlagsNative(int pFlags[], Pointer p);




    /**
     * Returns a handle to a compute device.
     *
     *      * CUresult cuDeviceGetByPCIBusId (
     *      CUdevice* dev,
     *      char* pciBusId )
     * 
     * 
     *   Returns a handle to a compute device.
     *     Returns in *device a device handle given a PCI bus ID
     *     string.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dev Returned device handle
     * @param pciBusId String in one of the following forms: [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetPCIBusId
     */
    public static int cuDeviceGetByPCIBusId(CUdevice dev, String pciBusId)
    {
        return checkResult(cuDeviceGetByPCIBusIdNative(dev, pciBusId));
    }
    private static native int cuDeviceGetByPCIBusIdNative(CUdevice dev, String pciBusId);

    /**
     *      * CUresult cuMemAllocManaged (
     *      CUdeviceptr* dptr,
     *      size_t bytesize,
     *      unsigned int  flags )
     * 
     * 
     * Allocates memory that will be automatically managed by the Unified
     * Memory system. 
 
     * Description
     * 
     * Allocates bytesize bytes of managed memory on the device and
     * returns in *dptr a pointer to the allocated memory. If the
     * device doesn't support allocating managed memory,
     * CUDA_ERROR_NOT_SUPPORTED is returned. Support for managed memory can be
     * queried using the device attribute CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY.
     * The allocated memory is suitably aligned for any kind of variable. The
     * memory is not cleared. If bytesize is 0, cuMemAllocManaged
     * returns CUDA_ERROR_INVALID_VALUE. The pointer is valid on the CPU and on
     * all GPUs in the system that support managed memory. All accesses to this
     * pointer must obey the Unified Memory programming model.
     * 
     * 
     * flags specifies the default stream association for this
     * allocation. flags must be one of CU_MEM_ATTACH_GLOBAL or
     * CU_MEM_ATTACH_HOST. If CU_MEM_ATTACH_GLOBAL is specified, then this
     * memory is accessible from any stream on any device. If CU_MEM_ATTACH_HOST
     * is specified, then the allocation is created with initial visibility
     * restricted to host access only; an explicit call to
     * cuStreamAttachMemAsync will be required to enable access on the device.
     * 
     * 
     * If the association is later changed via cuStreamAttachMemAsync to a
     * single stream, the default association as specifed during
     * cuMemAllocManaged is restored when that stream is destroyed. For
     * __managed__ variables, the default association is always
     * CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an asynchronous
     * operation, and as a result, the change to default association won't
     * happen until all work in the stream has completed.
     * 
     * 
     * Memory allocated with cuMemAllocManaged should be released with
     * cuMemFree.
     * 
     * 
     * On a multi-GPU system with peer-to-peer support, where multiple GPUs
     * support managed memory, the physical storage is created on the GPU which
     * is active at the time cuMemAllocManaged is called. All other GPUs will
     * reference the data at reduced bandwidth via peer mappings over the PCIe
     * bus. The Unified Memory management system does not migrate memory between
     * GPUs.
     * 
     * 
     * On a multi-GPU system where multiple GPUs support managed memory, but not
     * all pairs of such GPUs have peer-to-peer support between them, the
     * physical storage is created in 'zero-copy' or system memory. All GPUs
     * will reference the data at reduced bandwidth over the PCIe bus. In these
     * circumstances, use of the environment variable, CUDA_VISIBLE_DEVICES, is
     * recommended to restrict CUDA to only use those GPUs that have
     * peer-to-peer support. Alternatively, users can also set
     * CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to force the driver
     * to always use device memory for physical storage. When this environment
     * variable is set to a non-zero value, all contexts created in that process
     * on devices that support managed memory have to be peer-to-peer compatible
     * with each other. Context creation will fail if a context is created on a
     * device that supports managed memory and is not peer-to-peer compatible
     * with any of the other managed memory supporting devices on which contexts
     * were previously created, even if those contexts have been destroyed.
     * These environment variables are described in the CUDA programming guide
     * under the "CUDA environment variables" section.
     * 
     *  Note:
     * 
     * Note that this function may also return error codes from previous,
     * asynchronous launches.
     * 
     * 
     * 
     * 
     * 
     * @param dptr The device pointer
     * @param bytesize The size in bytes
     * @param flags The flags
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, 
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, 
     * CUDA_ERROR_NOT_SUPPORTED, CUDA_ERROR_INVALID_VALUE, 
     * CUDA_ERROR_OUT_OF_MEMORY
     * 
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuStreamAttachMemAsync
     */
    public static int cuMemAllocManaged(CUdeviceptr dptr, long bytesize, int flags)
    {
        return checkResult(cuMemAllocManagedNative(dptr, bytesize, flags));
    }
    private static native int cuMemAllocManagedNative(CUdeviceptr dptr, long bytesize, int flags);


    /**
     * Returns a PCI Bus Id string for the device.
     *
     *      * CUresult cuDeviceGetPCIBusId (
     *      char* pciBusId,
     *      int  len,
     *      CUdevice dev )
     * 
     * 
     *   Returns a PCI Bus Id string for the
     *     device.  Returns an ASCII string identifying the device dev
     *     in the NULL-terminated string pointed to by pciBusId. len specifies the maximum length of the string that may be
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pciBusId Returned identifier string for the device in the following format [domain]:[bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values. pciBusId should be large enough to store 13 characters including the NULL-terminator.
     * @param len Maximum length of string to store in name
     * @param dev Device to get identifier string for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuDeviceGet
     * @see JCudaDriver#cuDeviceGetAttribute
     * @see JCudaDriver#cuDeviceGetByPCIBusId
     */
    public static int cuDeviceGetPCIBusId(String pciBusId[], int len, CUdevice dev)
    {
        return checkResult(cuDeviceGetPCIBusIdNative(pciBusId, len, dev));
    }
    private static native int cuDeviceGetPCIBusIdNative(String pciBusId[], int len, CUdevice dev);


    /**
     * Gets an interprocess handle for a previously allocated event.
     *
     *      * CUresult cuIpcGetEventHandle (
     *      CUipcEventHandle* pHandle,
     *      CUevent event )
     * 
     * 
     *   Gets an interprocess handle for a
     *     previously allocated event.  Takes as input a previously allocated
     *     event. This event must
     *     have been created with the
     *     CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags set. This
     *     opaque handle may be copied into other processes and opened with
     *     cuIpcOpenEventHandle to allow efficient hardware synchronization
     *     between GPU work in different processes.
     *   
     *   After the event has been been opened in
     *     the importing process, cuEventRecord, cuEventSynchronize,
     *     cuStreamWaitEvent and cuEventQuery may be used in either process.
     *     Performing operations on the imported event after the exported event
     *     has been freed with cuEventDestroy will result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     *
     * @param pHandle Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle
     * @param event Event allocated with CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
     * CUDA_ERROR_MAP_FAILED
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */
    public static int cuIpcGetEventHandle(CUipcEventHandle pHandle, CUevent event)
    {
        return checkResult(cuIpcGetEventHandleNative(pHandle, event));
    }
    private static native int cuIpcGetEventHandleNative(CUipcEventHandle pHandle, CUevent event);


    /**
     * Opens an interprocess event handle for use in the current process.
     *
     *      * CUresult cuIpcOpenEventHandle (
     *      CUevent* phEvent,
     *      CUipcEventHandle handle )
     * 
     * 
     *   Opens an interprocess event handle for
     *     use in the current process.  Opens an interprocess event handle exported
     *     from another
     *     process with cuIpcGetEventHandle. This
     *     function returns a CUevent that behaves like a locally created event
     *     with the CU_EVENT_DISABLE_TIMING flag specified. This event must be
     *     freed with cuEventDestroy.
     *   
     *   Performing operations on the imported
     *     event after the exported event has been freed with cuEventDestroy will
     *     result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     *
     * @param phEvent Returns the imported event
     * @param handle Interprocess handle to open
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */
    public static int cuIpcOpenEventHandle(CUevent phEvent, CUipcEventHandle handle)
    {
        return checkResult(cuIpcOpenEventHandleNative(phEvent, handle));
    }
    private static native int cuIpcOpenEventHandleNative(CUevent phEvent, CUipcEventHandle handle);


    /**
     * Gets an interprocess memory handle for an existing device memory
     * allocation.
     *
     *      * CUresult cuIpcGetMemHandle (
     *      CUipcMemHandle* pHandle,
     *      CUdeviceptr dptr )
     * 
     * 
     *    /brief Gets an interprocess memory
     *     handle for an existing device memory allocation
     *   
     *   Takes a pointer to the base of an
     *     existing device memory allocation created with cuMemAlloc and exports
     *     it for use in another process. This is a lightweight operation and may
     *     be called multiple times on an allocation
     *     without adverse effects.
     *   
     *   If a region of memory is freed with
     *     cuMemFree and a subsequent call to cuMemAlloc returns memory with the
     *     same device address, cuIpcGetMemHandle will return a unique handle for
     *     the new memory.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     *
     * @param pHandle Pointer to user allocated CUipcMemHandle to return the handle in.
     * @param dptr Base pointer to previously allocated device memory
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
     * CUDA_ERROR_MAP_FAILED,
     *
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     */
    public static int cuIpcGetMemHandle(CUipcMemHandle pHandle, CUdeviceptr dptr)
    {
        return checkResult(cuIpcGetMemHandleNative(pHandle, dptr));
    }
    private static native int cuIpcGetMemHandleNative(CUipcMemHandle pHandle, CUdeviceptr dptr);


    /**
     *
     *      * CUresult cuIpcOpenMemHandle (
     *      CUdeviceptr* pdptr,
     *      CUipcMemHandle handle,
     *      unsigned int  Flags )
     * 
     * 
     *    /brief Opens an interprocess memory
     *     handle exported from another process and returns a device pointer
     *     usable in the local
     *     process.
     *   
     *   Maps memory exported from another
     *     process with cuIpcGetMemHandle into the current device address space.
     *     For contexts on different devices cuIpcOpenMemHandle can attempt to
     *     enable peer access between the devices as if the user called
     *     cuCtxEnablePeerAccess. This behavior is controlled by the
     *     CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. cuDeviceCanAccessPeer can
     *     determine if a mapping is possible.
     *   
     *   Contexts that may open CUipcMemHandles
     *     are restricted in the following way. CUipcMemHandles from each CUdevice
     *     in a given process may only be opened by one CUcontext per CUdevice
     *     per other process.
     *   
     * If the memory handle has already been opened by the current context, the
     * reference count on the handle is incremented by 1 and the existing device pointer
     * is returned.
     *   Memory returned from cuIpcOpenMemHandle
     *     must be freed with cuIpcCloseMemHandle.
     *   
     *   Calling cuMemFree on an exported memory
     *     region before calling cuIpcCloseMemHandle in the importing context will
     *     result in undefined behavior.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     *
     * @param pdptr Returned device pointer
     * @param handle CUipcMemHandle to open
     * @param Flags Flags for this operation. Must be specified as CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_TOO_MANY_PEERS
     *
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcCloseMemHandle
     * @see JCudaDriver#cuCtxEnablePeerAccess
     * @see JCudaDriver#cuDeviceCanAccessPeer
     */
    public static int cuIpcOpenMemHandle(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags)
    {
        return checkResult(cuIpcOpenMemHandleNative(pdptr, handle, Flags));
    }
    private static native int cuIpcOpenMemHandleNative(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags);


    /**
     * Close memory mapped with cuIpcOpenMemHandle.
     *
     *      * CUresult cuIpcCloseMemHandle (
     *      CUdeviceptr dptr )
     * 
     * 
     *   Close memory mapped with cuIpcOpenMemHandle.
     * Decrements the reference count of the memory returned by ::cuIpcOpenMemHandle by 1.
     * When the reference count reaches 0, this API unmaps the memory. The original allocation
     *     in the exporting process as well as imported mappings in other processes
     *     will be unaffected.
     *   
     *   Any resources used to enable peer access
     *     will be freed if this is the last mapping using them.
     *   
     *   IPC functionality is restricted to
     *     devices with support for unified addressing on Linux operating
     *     systems.
     *   
     * 
     *
     * @param dptr Device pointer returned by cuIpcOpenMemHandle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
     * CUDA_ERROR_INVALID_HANDLE,
     *
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuIpcGetEventHandle
     * @see JCudaDriver#cuIpcOpenEventHandle
     * @see JCudaDriver#cuIpcGetMemHandle
     * @see JCudaDriver#cuIpcOpenMemHandle
     */
    public static int cuIpcCloseMemHandle(CUdeviceptr dptr)
    {
        return checkResult(cuIpcCloseMemHandleNative(dptr));
    }
    private static native int cuIpcCloseMemHandleNative(CUdeviceptr dptr);




    /**
     * Registers an existing host memory range for use by CUDA.
     *
     *      * CUresult cuMemHostRegister (
     *      void* p,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
     * 
     *   Registers an existing host memory range
     *     for use by CUDA.  Page-locks the memory range specified by p
     *     and bytesize and maps it for the device(s) as specified by
     *     Flags. This memory range also is added to the same tracking
     *     mechanism as cuMemHostAlloc to automatically accelerate calls to
     *     functions such as cuMemcpyHtoD(). Since the memory can be accessed
     *     directly by the device, it can be read or written with much higher
     *     bandwidth than pageable
     *     memory that has not been registered.
     *     Page-locking excessive amounts of memory may degrade system performance,
     *     since it reduces
     *     the amount of memory available to the
     *     system for paging. As a result, this function is best used sparingly
     *     to register staging
     *     areas for data exchange between host and
     *     device.
     *   
     *   This function has limited support on
     *     Mac OS X. OS 10.7 or higher is required.
     *   
     *   The Flags parameter enables
     *     different options to be specified that affect the allocation, as
     *     follows.
     *   
     * 

     * - ::CU_MEMHOSTREGISTER_PORTABLE: The memory returned by this call will be
     *   considered as pinned memory by all CUDA contexts, not just the one that
     *   performed the allocation.
     * 


     * - ::CU_MEMHOSTREGISTER_DEVICEMAP: Maps the allocation into the CUDA address
     *   space. The device pointer to the memory may be obtained by calling
     *   ::cuMemHostGetDevicePointer().
     * 


     * - ::CU_MEMHOSTREGISTER_IOMEMORY: The pointer is treated as pointing to some
     *   I/O memory space, e.g. the PCI Express resource of a 3rd party device.
     * 


     * - ::CU_MEMHOSTREGISTER_READ_ONLY: The pointer is treated as pointing to memory
     *   that is considered read-only by the device.  On platforms without
     *   CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is
     *   required in order to register memory mapped to the CPU as read-only.  Support
     *   for the use of this flag can be queried from the device attribute
     *   CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED.  Using this flag with
     *   a current context associated with a device that does not have this attribute
     *   set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED.
     *  

  
     *   
     *   All of these flags are orthogonal to
     *     one another: a developer may page-lock memory that is portable or
     *     mapped with no restrictions.
     *   
     *   The CUDA context must have been created
     *     with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTREGISTER_DEVICEMAP
     *     flag to have any effect.
     *   
     *   The CU_MEMHOSTREGISTER_DEVICEMAP flag
     *     may be specified on CUDA contexts for devices that do not support
     *     mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
     *     because the memory may be mapped into other CUDA contexts via the
     *     CU_MEMHOSTREGISTER_PORTABLE flag.
     *   
     *   The memory page-locked by this function
     *     must be unregistered with cuMemHostUnregister().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param p Host pointer to memory to page-lock
     * @param bytesize Size in bytes of the address range to page-lock
     * @param Flags Flags for allocation request
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
     *
     * @see JCudaDriver#cuMemHostUnregister
     * @see JCudaDriver#cuMemHostGetFlags
     * @see JCudaDriver#cuMemHostGetDevicePointer
     */
    public static int cuMemHostRegister(Pointer p, long bytesize, int Flags)
    {
        return checkResult(cuMemHostRegisterNative(p, bytesize, Flags));
    }
    private static native int cuMemHostRegisterNative(Pointer p, long bytesize, int Flags);


    /**
     * Unregisters a memory range that was registered with cuMemHostRegister.
     *
     *      * CUresult cuMemHostUnregister (
     *      void* p )
     * 
     * 
     *   Unregisters a memory range that was
     *     registered with cuMemHostRegister.  Unmaps the memory range whose base
     *     address is specified
     *     by p, and makes it pageable
     *     again.
     *   
     *   The base address must be the same one
     *     specified to cuMemHostRegister().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param p Host pointer to memory to unregister
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
     *
     * @see JCudaDriver#cuMemHostRegister
     */
    public static int cuMemHostUnregister(Pointer p)
    {
        return checkResult(cuMemHostUnregisterNative(p));
    }
    private static native int cuMemHostUnregisterNative(Pointer p);


    /**
     * Copies memory.
     *
     *      * CUresult cuMemcpy (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory.  Copies data between two
     *     pointers. dst and src are base pointers of the
     *     destination and source, respectively. ByteCount specifies
     *     the number of bytes to copy. Note that this function infers the type
     *     of the transfer (host to host, host to device,
     *     device to device, or device to host) from
     *     the pointer values. This function is only allowed in contexts which
     *     support unified
     *     addressing. Note that this function is
     *     synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dst Destination unified virtual address space pointer
     * @param src Source unified virtual address space pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpy(CUdeviceptr dst, CUdeviceptr src, long ByteCount)
    {
        return checkResult(cuMemcpyNative(dst, src, ByteCount));
    }
    private static native int cuMemcpyNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount);


    /**
     * Copies device memory between two contexts.
     *
     *      * CUresult cuMemcpyPeer (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount )
     * 
     * 
     *   Copies device memory between two contexts.
     *     Copies from device memory in one context to device memory in another
     *     context.
     *     dstDevice is the base device
     *     pointer of the destination memory and dstContext is the
     *     destination context. srcDevice is the base device pointer of
     *     the source memory and srcContext is the source pointer. ByteCount specifies the number of bytes to copy.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host, but serialized with respect all pending and
     *     future asynchronous
     *     work in to the current context, srcContext, and dstContext (use cuMemcpyPeerAsync to
     *     avoid this synchronization).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstContext Destination context
     * @param srcDevice Source device pointer
     * @param srcContext Source context
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpy3DPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */
    public static int cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount)
    {
        return cuMemcpyPeerNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount);
    }
    private static native int cuMemcpyPeerNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount);

    /**
     * Allocates device memory.
     *
     *      * CUresult cuMemAlloc (
     *      CUdeviceptr* dptr,
     *      size_t bytesize )
     * 
     * 
     *   Allocates device memory.  Allocates bytesize bytes of linear memory on the device and returns in *dptr a pointer to the allocated memory. The allocated memory is
     *     suitably aligned for any kind of variable. The memory is not cleared.
     *     If bytesize is 0, cuMemAlloc()
     *     returns CUDA_ERROR_INVALID_VALUE.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Returned device pointer
     * @param bytesize Requested allocation size in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
    {
        return checkResult(cuMemAllocNative(dptr, bytesize));
    }

    private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize);


    /**
     * Allocates pitched device memory.
     *
     *      * CUresult cuMemAllocPitch (
     *      CUdeviceptr* dptr,
     *      size_t* pPitch,
     *      size_t WidthInBytes,
     *      size_t Height,
     *      unsigned int  ElementSizeBytes )
     * 
     * 
     *   Allocates pitched device memory.
     *     Allocates at least WidthInBytes * Height bytes of
     *     linear memory on the device and returns in *dptr a pointer
     *     to the allocated memory. The function may pad the allocation to ensure
     *     that corresponding pointers in any given
     *     row will continue to meet the alignment
     *     requirements for coalescing as the address is updated from row to row.
     *     ElementSizeBytes specifies the size of the largest reads and
     *     writes that will be performed on the memory range. ElementSizeBytes may be 4, 8 or 16 (since coalesced memory
     *     transactions are not possible on other data sizes). If ElementSizeBytes is smaller than the actual read/write size of a
     *     kernel, the kernel will run correctly, but possibly at reduced speed.
     *     The
     *     pitch returned in *pPitch by
     *     cuMemAllocPitch() is the width in bytes of the allocation. The intended
     *     usage of pitch is as a separate parameter of the allocation, used to
     *     compute addresses within the 2D array.
     *     Given the row and column of an array element of type T,
     *     the address is computed as:
     *   
   T* pElement = (T*)((char*)BaseAddress
     * + Row * Pitch) + Column;
     *   
     *   The pitch returned by cuMemAllocPitch()
     *     is guaranteed to work with cuMemcpy2D() under all circumstances. For
     *     allocations of 2D arrays, it is recommended that programmers consider
     *     performing pitch allocations
     *     using cuMemAllocPitch(). Due to alignment
     *     restrictions in the hardware, this is especially true if the application
     *     will be performing 2D memory copies
     *     between different regions of device
     *     memory (whether linear memory or CUDA arrays).
     *   
     *   The byte alignment of the pitch returned
     *     by cuMemAllocPitch() is guaranteed to match or exceed the alignment
     *     requirement for texture binding with cuTexRefSetAddress2D().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Returned device pointer
     * @param pPitch Returned pitch of allocation in bytes
     * @param WidthInBytes Requested allocation width in bytes
     * @param Height Requested allocation height in rows
     * @param ElementSizeBytes Size of largest reads/writes for range
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemAllocPitch(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes)
    {
        return checkResult(cuMemAllocPitchNative(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes));
    }

    private static native int cuMemAllocPitchNative(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes);


    /**
     * Frees device memory.
     *
     *      * CUresult cuMemFree (
     *      CUdeviceptr dptr )
     * 
     * 
     *   Frees device memory.  Frees the memory
     *     space pointed to by dptr, which must have been returned by a
     *     previous call to cuMemAlloc() or cuMemAllocPitch().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Pointer to memory to free
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemFree(CUdeviceptr dptr)
    {
        return checkResult(cuMemFreeNative(dptr));
    }

    private static native int cuMemFreeNative(CUdeviceptr dptr);


    /**
     * Get information on memory allocations.
     *
     *      * CUresult cuMemGetAddressRange (
     *      CUdeviceptr* pbase,
     *      size_t* psize,
     *      CUdeviceptr dptr )
     * 
     * 
     *   Get information on memory allocations.
     *     Returns the base address in *pbase and size in *psize
     *     of the allocation by cuMemAlloc() or cuMemAllocPitch() that contains
     *     the input pointer dptr. Both parameters pbase and
     *     psize are optional. If one of them is NULL, it is ignored.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pbase Returned base address
     * @param psize Returned size of device memory allocation
     * @param dptr Device pointer to query
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemGetAddressRange(CUdeviceptr pbase, long psize[], CUdeviceptr dptr)
    {
        return checkResult(cuMemGetAddressRangeNative(pbase, psize, dptr));
    }

    private static native int cuMemGetAddressRangeNative(CUdeviceptr pbase, long psize[], CUdeviceptr dptr);


    /**
     * Allocates page-locked host memory.
     *
     *      * CUresult cuMemAllocHost (
     *      void** pp,
     *      size_t bytesize )
     * 
     * 
     *   Allocates page-locked host memory.
     *     Allocates bytesize bytes of host memory that is page-locked
     *     and accessible to the device. The driver tracks the virtual memory
     *     ranges allocated
     *     with this function and automatically
     *     accelerates calls to functions such as cuMemcpy(). Since the memory
     *     can be accessed directly by the device, it can be read or written with
     *     much higher bandwidth than pageable
     *     memory obtained with functions such as
     *     malloc(). Allocating excessive amounts of memory with cuMemAllocHost()
     *     may degrade system performance, since it reduces the amount of memory
     *     available to the system for paging. As a result, this
     *     function is best used sparingly to
     *     allocate staging areas for data exchange between host and device.
     *   
     *   Note all host memory allocated using
     *     cuMemHostAlloc() will automatically be immediately accessible to all
     *     contexts on all devices which support unified addressing (as may be
     *     queried
     *     using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
     *     The device pointer that may be used to access this host memory from
     *     those contexts is always equal to the returned host
     *     pointer *pp. See Unified
     *     Addressing for additional details.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pp Returned host pointer to page-locked memory
     * @param bytesize Requested allocation size in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemAllocHost(Pointer pointer, long bytesize)
    {
        return checkResult(cuMemAllocHostNative(pointer, bytesize));
    }

    private static native int cuMemAllocHostNative(Pointer pp, long bytesize);


    /**
     * Frees page-locked host memory.
     *
     *      * CUresult cuMemFreeHost (
     *      void* p )
     * 
     * 
     *   Frees page-locked host memory.  Frees
     *     the memory space pointed to by p, which must have been
     *     returned by a previous call to cuMemAllocHost().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param p Pointer to memory to free
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemFreeHost(Pointer p)
    {
        return checkResult(cuMemFreeHostNative(p));
    }

    private static native int cuMemFreeHostNative(Pointer p);


    /**
     * Copies memory from Host to Device.
     *
     *      * CUresult cuMemcpyHtoD (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Host to Device.
     *     Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
     *     respectively. ByteCount specifies the number of bytes to
     *     copy. Note that this function is synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount)
    {
        return checkResult(cuMemcpyHtoDNative(dstDevice, srcHost, ByteCount));
    }

    private static native int cuMemcpyHtoDNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount);


    /**
     * Copies memory from Device to Host.
     *
     *      * CUresult cuMemcpyDtoH (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Host.
     *     Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is synchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstHost Destination host pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Device to Device.
     *
     *      * CUresult cuMemcpyDtoD (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Device.
     *     Copies from device memory to device memory. dstDevice and
     *     srcDevice are the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is asynchronous.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoDNative(dstDevice, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoDNative(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Device to Array.
     *
     *      * CUresult cuMemcpyDtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Device to Array.
     *     Copies from device memory to a 1D CUDA array. dstArray and
     *     dstOffset specify the CUDA array handle and starting index
     *     of the destination data. srcDevice specifies the base pointer
     *     of the source. ByteCount specifies the number of bytes to
     *     copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyDtoA(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount)
    {
        return checkResult(cuMemcpyDtoANative(dstArray, dstIndex, srcDevice, ByteCount));
    }

    private static native int cuMemcpyDtoANative(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount);


    /**
     * Copies memory from Array to Device.
     *
     *      * CUresult cuMemcpyAtoD (
     *      CUdeviceptr dstDevice,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Device.
     *     Copies from one 1D CUDA array to device memory. dstDevice
     *     specifies the base pointer of the destination and must be naturally
     *     aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array handle and the offset in bytes
     *     into the array where the copy is to begin. ByteCount specifies
     *     the number of bytes to copy and must be evenly divisible by the array
     *     element size.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoDNative(dstDevice, hSrc, SrcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoDNative(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount);


    /**
     * Copies memory from Host to Array.
     *
     *      * CUresult cuMemcpyHtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Host to Array.  Copies
     *     from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
     *     bytes of the destination data. pSrc specifies the base
     *     address of the source. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyHtoA(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount)
    {
        return checkResult(cuMemcpyHtoANative(dstArray, dstIndex, pSrc, ByteCount));
    }

    private static native int cuMemcpyHtoANative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount);




    /**
     * Copies memory from Array to Host.
     *
     *      * CUresult cuMemcpyAtoH (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Host.  Copies
     *     from one 1D CUDA array to host memory. dstHost specifies the
     *     base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
     *     bytes of the source data. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstHost Destination device pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoHNative(dstHost, srcArray, srcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoHNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount);


    /**
     * Copies memory from Array to Array.
     *
     *      * CUresult cuMemcpyAtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
     * 
     *   Copies memory from Array to Array.
     *     Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA
     *     arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source offsets in bytes
     *     into the CUDA arrays. ByteCount is the number of bytes to be
     *     copied. The size of the elements in the CUDA arrays need not be the
     *     same format, but the elements
     *     must be the same size; and count must be
     *     evenly divisible by that size.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpyAtoA(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount)
    {
        return checkResult(cuMemcpyAtoANative(dstArray, dstIndex, srcArray, srcIndex, ByteCount));
    }

    private static native int cuMemcpyAtoANative(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount);


    /**
     * Copies memory for 2D arrays.
     *
     *      * CUresult cuMemcpy2D (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *           const void *srcHost;
     *           CUdeviceptr srcDevice;
     *           CUarray srcArray;
     *           unsigned int srcPitch;
     *
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *           void *dstHost;
     *           CUdeviceptr dstDevice;
     *           CUarray dstArray;
     *           unsigned int dstPitch;
     *
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy)
    {
        return checkResult(cuMemcpy2DNative(pCopy));
    }

    private static native int cuMemcpy2DNative(CUDA_MEMCPY2D pCopy);


    /**
     * Copies memory for 2D arrays.
     *
     *      * CUresult cuMemcpy2DUnaligned (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy)
    {
        return checkResult(cuMemcpy2DUnalignedNative(pCopy));
    }

    private static native int cuMemcpy2DUnalignedNative(CUDA_MEMCPY2D pCopy);


    /**
     * Copies memory for 3D arrays.
     *
     *      * CUresult cuMemcpy3D (
     *      const CUDA_MEMCPY3D* pCopy )
     * 
     * 
     *   Copies memory for 3D arrays.  Perform a
     *     3D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY3D structure is defined as:
     *   
     *           typedef struct CUDA_MEMCPY3D_st
     * {
     *
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     *
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     *
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost, srcPitch and srcHeight specify the (host) base address of the
     *     source data, the bytes per row, and the height of
     *     each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice, srcPitch and srcHeight specify the (device) base address of
     *     the source data, the bytes per row, and the height
     *     of each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice,
     *     srcPitch and srcHeight are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice, dstPitch and dstHeight are ignored.
     *   
     *   
     *     
     *       srcXInBytes, srcY and srcZ
     *         specify the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is
     *   
  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes, dstY and dstZ
     *         specify the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes, Height and Depth
     *         specify the width (in bytes), height and depth of the 3D copy being
     *         performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy3D() returns an error if any
     *     pitch is greater than the maximum allowed
     *     (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *   
     *   
     *     The srcLOD and dstLOD members of the
     *     CUDA_MEMCPY3D structure must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy)
    {
        return checkResult(cuMemcpy3DNative(pCopy));
    }

    private static native int cuMemcpy3DNative(CUDA_MEMCPY3D pCopy);


    /**
     * Copies memory between contexts.
     *
     *      * CUresult cuMemcpy3DPeer (
     *      const CUDA_MEMCPY3D_PEER* pCopy )
     * 
     * 
     *   Copies memory between contexts.  Perform
     *     a 3D memory copy according to the parameters specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER structure
     *     for documentation of its parameters.
     *   
     *   Note that this function is synchronous
     *     with respect to the host only if the source or destination memory is
     *     of type CU_MEMORYTYPE_HOST. Note also that this copy is serialized with
     *     respect all pending and future asynchronous work in to the current
     *     context,
     *     the copy's source context, and the copy's
     *     destination context (use cuMemcpy3DPeerAsync to avoid this
     *     synchronization).
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */
    public static int cuMemcpy3DPeer(CUDA_MEMCPY3D_PEER pCopy)
    {
        return checkResult(cuMemcpy3DPeerNative(pCopy));
    }
    private static native int cuMemcpy3DPeerNative(CUDA_MEMCPY3D_PEER pCopy);


    /**
     * Copies memory asynchronously.
     *
     *      * CUresult cuMemcpyAsync (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory asynchronously.  Copies
     *     data between two pointers. dst and src are base
     *     pointers of the destination and source, respectively. ByteCount
     *     specifies the number of bytes to copy. Note that this function infers
     *     the type of the transfer (host to host, host to device,
     *     device to device, or device to host) from
     *     the pointer values. This function is only allowed in contexts which
     *     support unified
     *     addressing. Note that this function is
     *     asynchronous and can optionally be associated to a stream by passing a
     *     non-zero hStream argument
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dst Destination unified virtual address space pointer
     * @param src Source unified virtual address space pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyAsyncNative(dst, src, ByteCount, hStream));
    }
    private static native int cuMemcpyAsyncNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream);


    /**
     * Copies device memory between two contexts asynchronously.
     *
     *      * CUresult cuMemcpyPeerAsync (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies device memory between two contexts
     *     asynchronously.  Copies from device memory in one context to device
     *     memory in another
     *     context. dstDevice is the base
     *     device pointer of the destination memory and dstContext is
     *     the destination context. srcDevice is the base device pointer
     *     of the source memory and srcContext is the source pointer.
     *     ByteCount specifies the number of bytes to copy. Note that
     *     this function is asynchronous with respect to the host and all work in
     *     other
     *     streams in other devices.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstContext Destination context
     * @param srcDevice Source device pointer
     * @param srcContext Source context
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpy3DPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */
    public static int cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyPeerAsyncNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream));
    }
    private static native int cuMemcpyPeerAsyncNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Host to Device.
     *
     *      * CUresult cuMemcpyHtoDAsync (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Host to Device.
     *     Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
     *     respectively. ByteCount specifies the number of bytes to
     *     copy.
     *   
     *   cuMemcpyHtoDAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyHtoDAsyncNative(dstDevice, srcHost, ByteCount, hStream));
    }

    private static native int cuMemcpyHtoDAsyncNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Device to Host.
     *
     *      * CUresult cuMemcpyDtoHAsync (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Device to Host.
     *     Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy.
     *   
     *   cuMemcpyDtoHAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstHost Destination host pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyDtoHAsync(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyDtoHAsyncNative(dstHost, srcDevice, ByteCount, hStream));
    }

    private static native int cuMemcpyDtoHAsyncNative(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);

    /**
     * Copies memory from Device to Device.
     *
     *      * CUresult cuMemcpyDtoDAsync (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Device to Device.
     *     Copies from device memory to device memory. dstDevice and
     *     srcDevice are the base pointers of the destination and
     *     source, respectively. ByteCount specifies the number of bytes
     *     to copy. Note that this function is asynchronous and can optionally be
     *     associated to a stream
     *     by passing a non-zero hStream
     *     argument
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param srcDevice Source device pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyDtoDAsyncNative(dstDevice, srcDevice, ByteCount, hStream));
    }

    private static native int cuMemcpyDtoDAsyncNative(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Host to Array.
     *
     *      * CUresult cuMemcpyHtoAAsync (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Host to Array.  Copies
     *     from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
     *     bytes of the destination data. srcHost specifies the base
     *     address of the source. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   cuMemcpyHtoAAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
     *     an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstArray Destination array
     * @param dstOffset Offset in bytes of destination array
     * @param srcHost Source host pointer
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyHtoAAsync(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyHtoAAsyncNative(dstArray, dstIndex, pSrc, ByteCount, hStream));
    }

    private static native int cuMemcpyHtoAAsyncNative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream);


    /**
     * Copies memory from Array to Host.
     *
     *      * CUresult cuMemcpyAtoHAsync (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory from Array to Host.  Copies
     *     from one 1D CUDA array to host memory. dstHost specifies the
     *     base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
     *     bytes of the source data. ByteCount specifies the number of
     *     bytes to copy.
     *   
     *   cuMemcpyAtoHAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstHost Destination pointer
     * @param srcArray Source array
     * @param srcOffset Offset in bytes of source array
     * @param ByteCount Size of memory copy in bytes
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream)
    {
        return checkResult(cuMemcpyAtoHAsyncNative(dstHost, srcArray, srcIndex, ByteCount, hStream));
    }

    private static native int cuMemcpyAtoHAsyncNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream);


    /**
     * Copies memory for 2D arrays.
     *
     *      * CUresult cuMemcpy2DAsync (
     *      const CUDA_MEMCPY2D* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory for 2D arrays.  Perform a
     *     2D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY2D structure is defined as:
     *   
     *      typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost and srcPitch specify the (host) base address of the source data
     *     and the bytes per row to apply. srcArray is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice and srcPitch specify the (device) base address of the source
     *     data and the bytes per row to apply. srcArray is
     *     ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice
     *     and srcPitch are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data and the bytes per row to apply. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data and the bytes per row to apply. dstArray
     *     is ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice and dstPitch are ignored.
     *   
     *   
     *     
     *       srcXInBytes and srcY specify
     *         the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is
     *   
  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes and dstY specify
     *         the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes and Height specify
     *         the width (in bytes) and height of the 2D copy being performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy2D() returns an error if any
     *     pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *     cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
     *     On intra-device memory copies (device to device, CUDA array to device,
     *     CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
     *     computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
     *     restriction, but may run significantly slower in the cases where
     *     cuMemcpy2D() would have returned an error code.
     *   
     *   cuMemcpy2DAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy2DAsyncNative(pCopy, hStream));
    }

    private static native int cuMemcpy2DAsyncNative(CUDA_MEMCPY2D pCopy, CUstream hStream);


    /**
     * Copies memory for 3D arrays.
     *
     *      * CUresult cuMemcpy3DAsync (
     *      const CUDA_MEMCPY3D* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory for 3D arrays.  Perform a
     *     3D memory copy according to the parameters specified in pCopy.
     *     The CUDA_MEMCPY3D structure is defined as:
     *   
     *           typedef struct CUDA_MEMCPY3D_st
     * {
     *
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     *
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     *
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
     *   where:
     *   
     *     
     *       srcMemoryType and dstMemoryType
     *         specify the type of memory of the source and destination, respectively;
     *         CUmemorytype_enum
     *         is defined as:
     *       
     *     
     *   
     *   
     *      typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     srcDevice and srcPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. srcArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_HOST,
     *     srcHost, srcPitch and srcHeight specify the (host) base address of the
     *     source data, the bytes per row, and the height of
     *     each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_DEVICE,
     *     srcDevice, srcPitch and srcHeight specify the (device) base address of
     *     the source data, the bytes per row, and the height
     *     of each 2D slice of the 3D array. srcArray
     *     is ignored.
     *   
     *   If srcMemoryType is CU_MEMORYTYPE_ARRAY,
     *     srcArray specifies the handle of the source data. srcHost, srcDevice,
     *     srcPitch and srcHeight are ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
     *     dstDevice and dstPitch specify the (unified virtual address space) base
     *     address of the source data and the bytes per row
     *     to apply. dstArray is ignored. This value
     *     may be used only if unified addressing is supported in the calling
     *     context.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_HOST,
     *     dstHost and dstPitch specify the (host) base address of the destination
     *     data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_DEVICE,
     *     dstDevice and dstPitch specify the (device) base address of the
     *     destination data, the bytes per row, and the height of each
     *     2D slice of the 3D array. dstArray is
     *     ignored.
     *   
     *   If dstMemoryType is CU_MEMORYTYPE_ARRAY,
     *     dstArray specifies the handle of the destination data. dstHost,
     *     dstDevice, dstPitch and dstHeight are ignored.
     *   
     *   
     *     
     *       srcXInBytes, srcY and srcZ
     *         specify the base address of the source data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the starting address
     *     is
     *   
  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
     *   
     *   For CUDA arrays, srcXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       dstXInBytes, dstY and dstZ
     *         specify the base address of the destination data for the copy.
     *       
     *     
     *   
     *   
     *   For host pointers, the base address is
     *   
  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
     *   
     *   For device pointers, the starting
     *     address is
     *   
  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
     *   
     *   For CUDA arrays, dstXInBytes must be
     *     evenly divisible by the array element size.
     *   
     *   
     *     
     *       WidthInBytes, Height and Depth
     *         specify the width (in bytes), height and depth of the 3D copy being
     *         performed.
     *       
     *     
     *     
     *       If specified, srcPitch must be
     *         greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
     *         be greater than or equal
     *         to WidthInBytes + dstXInBytes.
     *       
     *     
     *     
     *       If specified, srcHeight must
     *         be greater than or equal to Height + srcY, and dstHeight must be
     *         greater than or equal to Height
     *         + dstY.
     *       
     *     
     *   
     *   
     *   cuMemcpy3D() returns an error if any
     *     pitch is greater than the maximum allowed
     *     (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
     *   
     *   cuMemcpy3DAsync() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
     *     returns an error if a pointer to pageable memory is passed as input.
     *   
     *   The srcLOD and dstLOD members of the
     *     CUDA_MEMCPY3D structure must be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy3DAsyncNative(pCopy, hStream));
    }

    private static native int cuMemcpy3DAsyncNative(CUDA_MEMCPY3D pCopy, CUstream hStream);


    /**
     * Copies memory between contexts asynchronously.
     *
     *      * CUresult cuMemcpy3DPeerAsync (
     *      const CUDA_MEMCPY3D_PEER* pCopy,
     *      CUstream hStream )
     * 
     * 
     *   Copies memory between contexts
     *     asynchronously.  Perform a 3D memory copy according to the parameters
     *     specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER
     *     structure for documentation of its parameters.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCopy Parameters for the memory copy
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyPeerAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     */
    public static int cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream)
    {
        return checkResult(cuMemcpy3DPeerAsyncNative(pCopy, hStream));
    }
    private static native int cuMemcpy3DPeerAsyncNative(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream);


    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD8 (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 8-bit values to the specified value uc.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param uc Value to set
     * @param N Number of elements
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD8(CUdeviceptr dstDevice, byte uc, long N)
    {
        return checkResult(cuMemsetD8Native(dstDevice, uc, N));
    }

    private static native int cuMemsetD8Native(CUdeviceptr dstDevice, byte uc, long N);


    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD16 (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 16-bit values to the specified value us. The dstDevice pointer must be two byte aligned.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param us Value to set
     * @param N Number of elements
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD16(CUdeviceptr dstDevice, short us, long N)
    {
        return checkResult(cuMemsetD16Native(dstDevice, us, N));
    }

    private static native int cuMemsetD16Native(CUdeviceptr dstDevice, short us, long N);


    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD32 (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N )
     * 
     * 
     *   Initializes device memory.  Sets the
     *     memory range of N 32-bit values to the specified value ui. The dstDevice pointer must be four byte aligned.
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param ui Value to set
     * @param N Number of elements
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD32(CUdeviceptr dstDevice, int ui, long N)
    {
        return checkResult(cuMemsetD32Native(dstDevice, ui, N));
    }

    private static native int cuMemsetD32Native(CUdeviceptr dstDevice, int ui, long N);



    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD2D8 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 8-bit values to the specified value
     *     uc. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     This function performs fastest when the pitch is one that has been
     *     passed
     *     back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param uc Value to set
     * @param Width Width of row
     * @param Height Number of rows
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D8(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height)
    {
        return checkResult(cuMemsetD2D8Native(dstDevice, dstPitch, uc, Width, Height));
    }

    private static native int cuMemsetD2D8Native(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height);


    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD2D16 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 16-bit values to the specified value
     *     us. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     The dstDevice pointer and dstPitch offset must be
     *     two byte aligned. This function performs fastest when the pitch is one
     *     that has been passed back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param us Value to set
     * @param Width Width of row
     * @param Height Number of rows
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D16(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height)
    {
        return checkResult(cuMemsetD2D16Native(dstDevice, dstPitch, us, Width, Height));
    }

    private static native int cuMemsetD2D16Native(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height);


    /**
     * Initializes device memory.
     *
     *      * CUresult cuMemsetD2D32 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height )
     * 
     * 
     *   Initializes device memory.  Sets the 2D
     *     memory range of Width 32-bit values to the specified value
     *     ui. Height specifies the number of rows to set,
     *     and dstPitch specifies the number of bytes between each row.
     *     The dstDevice pointer and dstPitch offset must be
     *     four byte aligned. This function performs fastest when the pitch is
     *     one that has been passed back by cuMemAllocPitch().
     *   
     *   Note that this function is asynchronous
     *     with respect to the host unless dstDevice refers to pinned
     *     host memory.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param ui Value to set
     * @param Width Width of row
     * @param Height Number of rows
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D32(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height)
    {
        return checkResult(cuMemsetD2D32Native(dstDevice, dstPitch, ui, Width, Height));
    }

    private static native int cuMemsetD2D32Native(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height);


    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD8Async (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 8-bit values to the specified value uc.
     *   
     *   cuMemsetD8Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param uc Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD8Async(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD8AsyncNative(dstDevice, uc, N, hStream));
    }

    private static native int cuMemsetD8AsyncNative(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream);


    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD16Async (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 16-bit values to the specified value us.
     *     The dstDevice pointer must be two byte aligned.
     *   
     *   cuMemsetD16Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param us Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD16Async(CUdeviceptr dstDevice, short us, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD16AsyncNative(dstDevice, us, N, hStream));
    }

    private static native int cuMemsetD16AsyncNative(CUdeviceptr dstDevice, short us, long N, CUstream hStream);


    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD32Async (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the memory
     *     range of N 32-bit values to the specified value ui.
     *     The dstDevice pointer must be four byte aligned.
     *   
     *   cuMemsetD32Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param ui Value to set
     * @param N Number of elements
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuMemsetD32Async(CUdeviceptr dstDevice, int ui, long N, CUstream hStream)
    {
        return checkResult(cuMemsetD32AsyncNative(dstDevice, ui, N, hStream));
    }

    private static native int cuMemsetD32AsyncNative(CUdeviceptr dstDevice, int ui, long N, CUstream hStream);



    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD2D8Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 8-bit values to the specified value uc. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. This
     *     function performs fastest when the pitch is one that has been passed
     *     back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D8Async() is asynchronous and
     *     can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param uc Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D8Async(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D8AsyncNative(dstDevice, dstPitch, uc, Width, Height, hStream));
    }

    private static native int cuMemsetD2D8AsyncNative(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream);


    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD2D16Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 16-bit values to the specified value us. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. The
     *     dstDevice pointer and dstPitch offset must be two
     *     byte aligned. This function performs fastest when the pitch is one that
     *     has been passed back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D16Async() is asynchronous
     *     and can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param us Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD2D32Async
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D16Async(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D16AsyncNative(dstDevice, dstPitch, us, Width, Height, hStream));
    }

    private static native int cuMemsetD2D16AsyncNative(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream);


    /**
     * Sets device memory.
     *
     *      * CUresult cuMemsetD2D32Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
     * 
     *   Sets device memory.  Sets the 2D memory
     *     range of Width 32-bit values to the specified value ui. Height specifies the number of rows to set, and
     *     dstPitch specifies the number of bytes between each row. The
     *     dstDevice pointer and dstPitch offset must be four
     *     byte aligned. This function performs fastest when the pitch is one that
     *     has been passed back by cuMemAllocPitch().
     *   
     *   cuMemsetD2D32Async() is asynchronous
     *     and can optionally be associated to a stream by passing a non-zero stream argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dstDevice Destination device pointer
     * @param dstPitch Pitch of destination device pointer
     * @param ui Value to set
     * @param Width Width of row
     * @param Height Number of rows
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D8Async
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D16Async
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD8Async
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD16Async
     * @see JCudaDriver#cuMemsetD32
     * @see JCudaDriver#cuMemsetD32Async
     */
    public static int cuMemsetD2D32Async(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream)
    {
        return checkResult(cuMemsetD2D32AsyncNative(dstDevice, dstPitch, ui, Width, Height, hStream));
    }

    private static native int cuMemsetD2D32AsyncNative(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream);


    /**
     * Returns information about a function.
     *
     *      * CUresult cuFuncGetAttribute (
     *      int* pi,
     *      CUfunction_attribute attrib,
     *      CUfunction hfunc )
     * 
     * 
     *   Returns information about a function.
     *     Returns in *pi the integer value of the attribute attrib on the kernel given by hfunc. The supported
     *     attributes are:
     *   

     *     
     *       CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
     *         The maximum number of threads per block, beyond which a launch of the
     *         function would fail. This number depends on both the
     *         function and the device on which
     *         the function is currently loaded.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
     *         The size in bytes of statically-allocated shared memory per block
     *         required by this function. This does not include dynamically-allocated
     *         shared memory requested by the
     *         user at runtime.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES:
     *         The size in bytes of user-allocated constant memory required by this
     *         function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES:
     *         The size in bytes of local memory used by each thread of this
     *         function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_NUM_REGS:
     *         The number of registers used by each thread of this function.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_PTX_VERSION:
     *         The PTX virtual architecture version for which the function was
     *         compiled. This value is the major PTX version * 10 + the
     *         minor PTX version, so a PTX
     *         version 1.3 function would return the value 13. Note that this may
     *         return the undefined value
     *         of 0 for cubins compiled prior
     *         to CUDA 3.0.
     *       
     *     
     *     
     *       CU_FUNC_ATTRIBUTE_BINARY_VERSION:
     *         The binary architecture version for which the function was compiled.
     *         This value is the major binary version * 10 + the minor
     *         binary version, so a binary
     *         version 1.3 function would return the value 13. Note that this will
     *         return a value of 10 for legacy
     *         cubins that do not have a
     *         properly-encoded binary architecture version.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pi Returned attribute value
     * @param attrib Attribute requested
     * @param hfunc Function to query attribute of
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuLaunchKernel
     */
    public static int cuFuncGetAttribute (int pi[], int attrib, CUfunction func)
    {
        return checkResult(cuFuncGetAttributeNative(pi, attrib, func));
    }
    private static native int cuFuncGetAttributeNative(int pi[], int attrib, CUfunction func);

    /**
     * Sets information about a function.

     * 

     * This call sets the value of a specified attribute attrib on the kernel given
     * by hfunc to an integer value specified by val
     * This function returns CUDA_SUCCESS if the new value of the attribute could be
     * successfully set. If the set fails, this call will return an error.
     * Not all attributes can have values set. Attempting to set a value on a read-only
     * attribute will result in an error (CUDA_ERROR_INVALID_VALUE)
     * 

     * Supported attributes for the cuFuncSetAttribute call are:
     * 
     *   CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of
     *   dynamically-allocated shared memory. The value should contain the requested
     *   maximum size of dynamically-allocated shared memory. The sum of this value and
     *   the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the
     *   device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN.
     *   The maximal size of requestable dynamic shared memory may differ by GPU
     *   architecture.
     *   
     *   CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 
     *   cache and shared memory use the same hardware resources, this sets the shared memory
     *   carveout preference, in percent of the total resources. This is only a hint, and the
     *   driver can choose a different ratio if required to execute the function.
     *   
     * 
     *
     * @param hfunc  Function to query attribute of
     * @param attrib Attribute requested
     * @param value  The value to set
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuLaunchKernel
     * @see JCuda#cudaFuncGetAttributes
     * @see JCuda#cudaFuncSetAttribute
     */
    public static int cuFuncSetAttribute(CUfunction hfunc, int attrib, int value)
    {
        return checkResult(cuFuncSetAttributeNative(hfunc, attrib, value));
    }
    private static native int cuFuncSetAttributeNative(CUfunction hfunc, int attrib, int value);
    

    /**
     * Sets the block-dimensions for the function.
     *
     *      * CUresult cuFuncSetBlockShape (
     *      CUfunction hfunc,
     *      int  x,
     *      int  y,
     *      int  z )
     * 
     * 
     *   Sets the block-dimensions for the
     *     function.
     *     Deprecated Specifies the x, y, and z dimensions of the thread blocks that are
     *     created when the kernel given by hfunc is launched.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to specify dimensions of
     * @param x X dimension
     * @param y Y dimension
     * @param z Z dimension
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
    {
        return checkResult(cuFuncSetBlockShapeNative(hfunc, x, y, z));
    }

    private static native int cuFuncSetBlockShapeNative(CUfunction hfunc, int x, int y, int z);


    /**
     * Sets the dynamic shared-memory size for the function.
     *
     *      * CUresult cuFuncSetSharedSize (
     *      CUfunction hfunc,
     *      unsigned int  bytes )
     * 
     * 
     *   Sets the dynamic shared-memory size for
     *     the function.
     *     Deprecated Sets through bytes
     *     the amount of dynamic shared memory that will be available to each
     *     thread block when the kernel given by hfunc is launched.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to specify dynamic shared-memory size for
     * @param bytes Dynamic shared-memory size per thread in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuFuncSetSharedSize(CUfunction hfunc, int bytes)
    {
        return checkResult(cuFuncSetSharedSizeNative(hfunc, bytes));
    }

    private static native int cuFuncSetSharedSizeNative(CUfunction hfunc, int bytes);


    /**
     * Sets the preferred cache configuration for a device function.
     *
     *      * CUresult cuFuncSetCacheConfig (
     *      CUfunction hfunc,
     *      CUfunc_cache config )
     * 
     * 
     *   Sets the preferred cache configuration
     *     for a device function.  On devices where the L1 cache and shared memory
     *     use the same
     *     hardware resources, this sets through
     *     config the preferred cache configuration for the device
     *     function hfunc. This is only a preference. The driver will
     *     use the requested configuration if possible, but it is free to choose
     *     a different
     *     configuration if required to execute hfunc. Any context-wide preference set via cuCtxSetCacheConfig()
     *     will be overridden by this per-function setting unless the per-function
     *     setting is CU_FUNC_CACHE_PREFER_NONE. In that case, the current
     *     context-wide setting will be used.
     *   
     *   This setting does nothing on devices
     *     where the size of the L1 cache and shared memory are fixed.
     *   
     *   Launching a kernel with a different
     *     preference than the most recent preference setting may insert a
     *     device-side synchronization
     *     point.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to configure cache for
     * @param config Requested cache configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuLaunchKernel
     */
    public static int cuFuncSetCacheConfig(CUfunction hfunc, int config)
    {
        return checkResult(cuFuncSetCacheConfigNative(hfunc, config));
    }

    private static native int cuFuncSetCacheConfigNative(CUfunction hfunc, int config);


    /**
     * Sets the shared memory configuration for a device function.
     *
     *      * CUresult cuFuncSetSharedMemConfig (
     *      CUfunction hfunc,
     *      CUsharedconfig config )
     * 
     * 
     *   Sets the shared memory configuration for
     *     a device function.  On devices with configurable shared memory banks,
     *     this function
     *     will force all subsequent launches of
     *     the specified device function to have the given shared memory bank size
     *     configuration.
     *     On any given launch of the function, the
     *     shared memory configuration of the device will be temporarily changed
     *     if needed to
     *     suit the function's preferred
     *     configuration. Changes in shared memory configuration between subsequent
     *     launches of functions,
     *     may introduce a device side synchronization
     *     point.
     *   
     *   Any per-function setting of shared
     *     memory bank size set via cuFuncSetSharedMemConfig will override the
     *     context wide setting set with cuCtxSetSharedMemConfig.
     *   
     *   Changing the shared memory bank size
     *     will not increase shared memory usage or affect occupancy of kernels,
     *     but may have major
     *     effects on performance. Larger bank sizes
     *     will allow for greater potential bandwidth to shared memory, but will
     *     change what
     *     kinds of accesses to shared memory will
     *     result in bank conflicts.
     *   
     *   This function will do nothing on devices
     *     with fixed shared memory bank size.
     *   
     *   The supported bank configurations are:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
     *         use the context's shared memory configuration when launching this
     *         function.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
     *         to be natively four bytes when launching this function.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
     *         width to be natively eight bytes when launching this function.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc kernel to be given a shared memory config
     * @param config requested shared memory configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuCtxSetSharedMemConfigcuFuncGetAttribute
     * @see JCudaDriver#cuLaunchKernel
     */
    public static int cuFuncSetSharedMemConfig(CUfunction hfunc, int config)
    {
        return checkResult(cuFuncSetSharedMemConfigNative(hfunc, config));
    }
    private static native int cuFuncSetSharedMemConfigNative(CUfunction hfunc, int config);

    /**
     * Creates a 1D or 2D CUDA array.
     *
     *      * CUresult cuArrayCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY_DESCRIPTOR* pAllocateArray )
     * 
     * 
     *   Creates a 1D or 2D CUDA array.  Creates
     *     a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY_DESCRIPTOR is defined as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *     } CUDA_ARRAY_DESCRIPTOR;
     *   where:
     *   
     *     
     *       Width, and Height are the width, and height of the CUDA array (in elements);
     *         the CUDA array is one-dimensional if height is 0, two-dimensional
     *         otherwise;
     *       
     *     
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as:
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   Here are examples of CUDA array
     *     descriptions:
     *   
     *   Description for a CUDA array of 2048
     *     floats:
     *   
    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 1;
     *   
     *   Description for a 64 x 64 CUDA array of
     *     floats:
     *   
    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *   
     *   Description for a width x height CUDA array of 64-bit, 4x16-bit float16's:
     *   
     * CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *   
     *   Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit
     *     unsigned chars:
     *   
    CUDA_ARRAY_DESCRIPTOR arrayDesc;
     *     desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
     *     desc.NumChannels = 2;
     *     desc.Width = width;
     *     desc.Height = height;
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pHandle Returned array
     * @param pAllocateArray Array descriptor
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray)
    {
        return checkResult(cuArrayCreateNative(pHandle, pAllocateArray));
    }

    private static native int cuArrayCreateNative(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray);


    /**
     * Get a 1D or 2D CUDA array descriptor.
     *
     *      * CUresult cuArrayGetDescriptor (
     *      CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
     * 
     *   Get a 1D or 2D CUDA array descriptor.
     *     Returns in *pArrayDescriptor a descriptor containing
     *     information on the format and dimensions of the CUDA array hArray. It is useful for subroutines that have been passed a CUDA
     *     array, but need to know the CUDA array parameters for validation
     *     or other purposes.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pArrayDescriptor Returned array descriptor
     * @param hArray Array to get descriptor of
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray)
    {
        return checkResult(cuArrayGetDescriptorNative(pArrayDescriptor, hArray));
    }

    private static native int cuArrayGetDescriptorNative(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray);


    /**
     * Returns the layout properties of a sparse CUDA array.
     *
     * Returns the layout properties of a sparse CUDA array in \p sparseProperties
     * If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE 
     * ::CUDA_ERROR_INVALID_VALUE will be returned.
     *
     * If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL,
     * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array. Otherwise, it will be zero.
     * Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero.
     * Note that the \p array must have been allocated using ::cuArrayCreate or ::cuArray3DCreate. For CUDA arrays obtained
     * using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. Instead, ::cuMipmappedArrayGetSparseProperties 
     * must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to.
     *
     * @return
     * CUDA_SUCCESS
     * CUDA_ERROR_INVALID_VALUE
     *
     * @param sparseProperties Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES
     * @param array CUDA array to get the sparse properties of
     * 
     * @see JCudaDriver#cuMipmappedArrayGetSparseProperties
     * @see JCudaDriver#cuMemMapArrayAsync
     */
    public static int cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUarray array)
    {
        return checkResult(cuArrayGetSparsePropertiesNative(sparseProperties, array));
    }
    private static native int cuArrayGetSparsePropertiesNative(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUarray array);

    /**
     * Returns the layout properties of a sparse CUDA mipmapped array.
     *
     * Returns the sparse array layout properties in \p sparseProperties
     * If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE 
     * ::CUDA_ERROR_INVALID_VALUE will be returned.
     *
     * For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the
     * size of the mip tail region. The mip tail region includes all mip levels whose width, height or depth
     * is less than that of the tile.
     * For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL,
     * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined. 
     * Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer.
     * The returned value of ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero.
     *
     * @return
     * CUDA_SUCCESS
     * CUDA_ERROR_INVALID_VALUE
     *
     * @param sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES
     * @param mipmap - CUDA mipmapped array to get the sparse properties of
     * 
     * @see JCudaDriver#cuArrayGetSparseProperties
     * @see JCudaDriver#cuMemMapArrayAsync
     */
    public static int cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUmipmappedArray mipmap)
    {
        return checkResult(cuMipmappedArrayGetSparsePropertiesNative(sparseProperties, mipmap));
    }
    private static native int cuMipmappedArrayGetSparsePropertiesNative(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUmipmappedArray mipmap);
    
    
    /**
     * 
     * \brief Gets a CUDA array plane from a CUDA array
     *
     * Returns in \p pPlaneArray a CUDA array that represents a single format plane
     * of the CUDA array \p hArray.
     *
     * If \p planeIdx is greater than the maximum number of planes in this array or if the array does
     * not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then ::CUDA_ERROR_INVALID_VALUE is returned.
     *
     * Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns
     * a CUDA array of the same size as \p hArray but with one channel and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
     * If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width
     * of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
     *
     * \param pPlaneArray   - Returned CUDA array referenced by the \p planeIdx
     * \param hArray        - Multiplanar CUDA array
     * \param planeIdx      - Plane index
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_INVALID_HANDLE
     * \notefnerr
     *
     * \sa
     * ::cuArrayCreate,
     * ::cudaGetArrayPlane
     * 
     */
    public static int cuArrayGetPlane(CUarray pPlaneArray, CUarray hArray, int planeIdx)
    {
        return checkResult(cuArrayGetPlaneNative(pPlaneArray, hArray, planeIdx));
    }
    private static native int cuArrayGetPlaneNative(CUarray pPlaneArray, CUarray hArray, int planeIdx);
    
    
    /**
     * Destroys a CUDA array.
     *
     *      * CUresult cuArrayDestroy (
     *      CUarray hArray )
     * 
     * 
     *   Destroys a CUDA array.  Destroys the CUDA
     *     array hArray.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hArray Array to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ARRAY_IS_MAPPED
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuArrayDestroy(CUarray hArray)
    {
        return checkResult(cuArrayDestroyNative(hArray));
    }

    private static native int cuArrayDestroyNative(CUarray hArray);


    /**
     * Creates a 3D CUDA array.
     *
     *      * CUresult cuArray3DCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray )
     * 
     * 
     *   Creates a 3D CUDA array.  Creates a CUDA
     *     array according to the CUDA_ARRAY3D_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY3D_DESCRIPTOR is defined as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
     *   where:
     *   
     *     
     *       
     *         Width, Height, and Depth are the width, height, and depth of
     *         the CUDA array (in elements); the following types of CUDA arrays can
     *         be allocated:
     *         
     *           
     *             A 1D array is allocated
     *               if Height and Depth extents are both zero.
     *             
     *           
     *           
     *             A 2D array is allocated
     *               if only Depth extent is zero.
     *             
     *           
     *           
     *             A 3D array is allocated
     *               if all three extents are non-zero.
     *             
     *           
     *           
     *             A 1D layered CUDA
     *               array is allocated if only Height is zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A 2D layered CUDA
     *               array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A cubemap CUDA array
     *               is allocated if all three extents are non-zero and the CUDA_ARRAY3D_CUBEMAP
     *               flag is set. Width must be equal to Height, and
     *               Depth must be six. A cubemap is a special type of 2D layered
     *               CUDA array, where the six layers represent the six faces of a cube.
     *               The order of the six
     *               layers in memory is the same as that listed in CUarray_cubemap_face.
     *             
     *           
     *           
     *             A cubemap layered CUDA
     *               array is allocated if all three extents are non-zero, and both,
     *               CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
     *               be a multiple of six. A cubemap layered CUDA array is a special type
     *               of 2D layered CUDA array that consists of a collection
     *               of cubemaps. The first
     *               six layers represent the first cubemap, the next six layers form the
     *               second cubemap, and so on.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as:
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *   
     *   
     *   
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Flags may be set to
     *         
     *           
     *             CUDA_ARRAY3D_LAYERED
     *               to enable creation of layered CUDA arrays. If this flag is set, Depth specifies the number of layers, not the depth of a 3D
     *               array.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_SURFACE_LDST
     *               to enable surface references to be bound to the CUDA array. If this
     *               flag is not set, cuSurfRefSetArray will fail when attempting to bind
     *               the CUDA array to a surface reference.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_CUBEMAP
     *               to enable creation of cubemaps. If this flag is set, Width
     *               must be equal to Height, and Depth must be six. If
     *               the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must
     *               be a multiple of six.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_TEXTURE_GATHER
     *               to indicate that the CUDA array will be used for texture gather.
     *               Texture gather can only be performed on 2D CUDA arrays.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   Width, Height and
     *     Depth must meet certain size requirements as listed in the
     *     following table. All values are specified in elements. Note that for
     *     brevity's sake, the full name of the
     *     device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to
     *     the device attribute
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH.
     *   
     *   Note that 2D CUDA arrays have different
     *     size requirements if the CUDA_ARRAY3D_TEXTURE_GATHER flag is set. Width and Height must not be greater than
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH and
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in
     *     that case.
     *   
     *   
     *     
     *       
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *         
     *           
     *           
     *           
     *         
     *       
     *     
     *             CUDA array
     *               type
     *             
     *           
     *             Valid extents
     *               that must always be met
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           
     *             Valid extents
     *               with CUDA_ARRAY3D_SURFACE_LDST set
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           

     *             1D 
     *           
     *             { (1,TEXTURE1D_WIDTH),
     *               0, 0 }
     *             
     *           
     *             { (1,SURFACE1D_WIDTH),
     *               0, 0 }
     *             
     *           

     *             2D 
     *           
     *             { (1,TEXTURE2D_WIDTH),
     *               (1,TEXTURE2D_HEIGHT), 0 }
     *             
     *           
     *             { (1,SURFACE2D_WIDTH),
     *               (1,SURFACE2D_HEIGHT), 0 }
     *             
     *           

     *             3D 
     *           
     *             { (1,TEXTURE3D_WIDTH),
     *               (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
     *               OR
     *               {
     *               (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
     *               (1,TEXTURE3D_DEPTH_ALTERNATE) }
     *             
     *           
     *             { (1,SURFACE3D_WIDTH),
     *               (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) }
     *             
     *           

     *             1D Layered 
     *           
     *             {
     *               (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) }
     *             
     *           
     *             {
     *               (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) }
     *             
     *           

     *             2D Layered 
     *           
     *             {
     *               (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
     *               (1,TEXTURE2D_LAYERED_LAYERS) }
     *             
     *           
     *             {
     *               (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
     *               (1,SURFACE2D_LAYERED_LAYERS) }
     *             
     *           

     *             Cubemap 
     *           
     *             { (1,TEXTURECUBEMAP_WIDTH),
     *               (1,TEXTURECUBEMAP_WIDTH), 6 }
     *             
     *           
     *             { (1,SURFACECUBEMAP_WIDTH),
     *               (1,SURFACECUBEMAP_WIDTH), 6 }
     *             
     *           

     *             Cubemap Layered 
     *           
     *             {
     *               (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
     *               (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
     *             
     *           
     *             {
     *               (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
     *               (1,SURFACECUBEMAP_LAYERED_LAYERS) }
     *             
     *           
     *   
     *   
     *   Here are examples of CUDA array
     *     descriptions:
     *   
     *   Description for a CUDA array of 2048
     *     floats:
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 0;
     *     desc.Depth = 0;
     *   
     *   Description for a 64 x 64 CUDA array of
     *     floats:
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *     desc.Depth = 0;
     *   
     *   Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16's:
     *   
    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *     desc.Depth = depth;
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pHandle Returned array
     * @param pAllocateArray 3D array descriptor
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuArray3DGetDescriptor
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray)
    {
        return checkResult(cuArray3DCreateNative(pHandle, pAllocateArray));
    }

    private static native int cuArray3DCreateNative(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray);


    /**
     * Get a 3D CUDA array descriptor.
     *
     *      * CUresult cuArray3DGetDescriptor (
     *      CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
     * 
     *   Get a 3D CUDA array descriptor.  Returns
     *     in *pArrayDescriptor a descriptor containing information on
     *     the format and dimensions of the CUDA array hArray. It is
     *     useful for subroutines that have been passed a CUDA array, but need to
     *     know the CUDA array parameters for validation
     *     or other purposes.
     *   
     *   This function may be called on 1D and
     *     2D arrays, in which case the Height and/or Depth
     *     members of the descriptor struct will be set to 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pArrayDescriptor Returned 3D array descriptor
     * @param hArray 3D array to get descriptor of
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArrayDestroy
     * @see JCudaDriver#cuArrayGetDescriptor
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemAllocPitch
     * @see JCudaDriver#cuMemcpy2D
     * @see JCudaDriver#cuMemcpy2DAsync
     * @see JCudaDriver#cuMemcpy2DUnaligned
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuMemcpy3DAsync
     * @see JCudaDriver#cuMemcpyAtoA
     * @see JCudaDriver#cuMemcpyAtoD
     * @see JCudaDriver#cuMemcpyAtoH
     * @see JCudaDriver#cuMemcpyAtoHAsync
     * @see JCudaDriver#cuMemcpyDtoA
     * @see JCudaDriver#cuMemcpyDtoD
     * @see JCudaDriver#cuMemcpyDtoDAsync
     * @see JCudaDriver#cuMemcpyDtoH
     * @see JCudaDriver#cuMemcpyDtoHAsync
     * @see JCudaDriver#cuMemcpyHtoA
     * @see JCudaDriver#cuMemcpyHtoAAsync
     * @see JCudaDriver#cuMemcpyHtoD
     * @see JCudaDriver#cuMemcpyHtoDAsync
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemGetAddressRange
     * @see JCudaDriver#cuMemGetInfo
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostGetDevicePointer
     * @see JCudaDriver#cuMemsetD2D8
     * @see JCudaDriver#cuMemsetD2D16
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuMemsetD8
     * @see JCudaDriver#cuMemsetD16
     * @see JCudaDriver#cuMemsetD32
     */
    public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray)
    {
        return checkResult(cuArray3DGetDescriptorNative(pArrayDescriptor, hArray));
    }

    private static native int cuArray3DGetDescriptorNative(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray);


    /**
     * Creates a CUDA mipmapped array.
     *
     *      * CUresult cuMipmappedArrayCreate (
     *      CUmipmappedArray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
     *      unsigned int  numMipmapLevels )
     * 
     * 
     *   Creates a CUDA mipmapped array.  Creates
     *     a CUDA mipmapped array according to the CUDA_ARRAY3D_DESCRIPTOR
     *     structure pMipmappedArrayDesc and returns a handle to the
     *     new CUDA mipmapped array in *pHandle. numMipmapLevels
     *     specifies the number of mipmap levels to be allocated. This value is
     *     clamped to the range [1, 1 + floor(log2(max(width, height,
     *     depth)))].
     *   
     *   The CUDA_ARRAY3D_DESCRIPTOR is defined
     *     as:
     *   
     *       typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
     *   where:
     *   
     *     
     *       
     *         Width, Height, and Depth are the width, height, and depth of
     *         the CUDA array (in elements); the following types of CUDA arrays can
     *         be allocated:
     *         
     *           
     *             A 1D mipmapped array
     *               is allocated if Height and Depth extents are both
     *               zero.
     *             
     *           
     *           
     *             A 2D mipmapped array
     *               is allocated if only Depth extent is zero.
     *             
     *           
     *           
     *             A 3D mipmapped array
     *               is allocated if all three extents are non-zero.
     *             
     *           
     *           
     *             A 1D layered CUDA
     *               mipmapped array is allocated if only Height is zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A 2D layered CUDA
     *               mipmapped array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
     *               of layers is determined by the depth extent.
     *             
     *           
     *           
     *             A cubemap CUDA
     *               mipmapped array is allocated if all three extents are non-zero and the
     *               CUDA_ARRAY3D_CUBEMAP flag is set. Width must be equal to Height, and Depth must be six. A cubemap is a special
     *               type of 2D layered CUDA array, where the six layers represent the six
     *               faces of a cube.
     *               The order of the six
     *               layers in memory is the same as that listed in CUarray_cubemap_face.
     *             
     *           
     *           
     *             A cubemap layered CUDA
     *               mipmapped array is allocated if all three extents are non-zero, and
     *               both, CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
     *               be a multiple of six. A cubemap layered CUDA array is a special type
     *               of 2D layered CUDA array that consists of a collection
     *               of cubemaps. The first
     *               six layers represent the first cubemap, the next six layers form the
     *               second cubemap, and so on.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Format specifies the format
     *         of the elements; CUarray_format is defined as:
     *             typedef enum
     * CUarray_format_enum {
     *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
     *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
     *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
     *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
     *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
     *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
     *         CU_AD_FORMAT_HALF = 0x10,
     *         CU_AD_FORMAT_FLOAT = 0x20
     *     } CUarray_format;
     *       
     *     
     *   
     *   
     *   
     *     
     *       NumChannels specifies
     *         the number of packed components per CUDA array element; it may be 1,
     *         2, or 4;
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         Flags may be set to
     *         
     *           
     *             CUDA_ARRAY3D_LAYERED
     *               to enable creation of layered CUDA mipmapped arrays. If this flag is
     *               set, Depth specifies the number of layers, not the depth of
     *               a 3D array.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_SURFACE_LDST
     *               to enable surface references to be bound to individual mipmap levels
     *               of the CUDA mipmapped array. If this flag is not set,
     *               cuSurfRefSetArray will
     *               fail when attempting to bind a mipmap level of the CUDA mipmapped array
     *               to a surface reference.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_CUBEMAP
     *               to enable creation of mipmapped cubemaps. If this flag is set, Width must be equal to Height, and Depth must
     *               be six. If the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must be a multiple of six.
     *             
     *           
     *           
     *             CUDA_ARRAY3D_TEXTURE_GATHER
     *               to indicate that the CUDA mipmapped array will be used for texture
     *               gather. Texture gather can only be performed on 2D CUDA
     *               mipmapped arrays.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   Width, Height and
     *     Depth must meet certain size requirements as listed in the
     *     following table. All values are specified in elements. Note that for
     *     brevity's sake, the full name of the
     *     device attribute is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH
     *     refers to the device
     *     attribute
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH.
     *   
     *   
     *     
     *       
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *         
     *           
     *           
     *         
     *       
     *     
     *             CUDA array
     *               type
     *             
     *           
     *             Valid extents
     *               that must always be met
     *               {(width range in
     *               elements), (height range), (depth range)}
     *             
     *           

     *             1D 
     *           
     *             {
     *               (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }
     *             
     *           

     *             2D 
     *           
     *             {
     *               (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }
     *             
     *           

     *             3D 
     *           
     *             { (1,TEXTURE3D_WIDTH),
     *               (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
     *               OR
     *               {
     *               (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
     *               (1,TEXTURE3D_DEPTH_ALTERNATE) }
     *             
     *           

     *             1D Layered 
     *           
     *             {
     *               (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) }
     *             
     *           

     *             2D Layered 
     *           
     *             {
     *               (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
     *               (1,TEXTURE2D_LAYERED_LAYERS) }
     *             
     *           

     *             Cubemap 
     *           
     *             { (1,TEXTURECUBEMAP_WIDTH),
     *               (1,TEXTURECUBEMAP_WIDTH), 6 }
     *             
     *           

     *             Cubemap Layered 
     *           
     *             {
     *               (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
     *               (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
     *             
     *           
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pHandle Returned mipmapped array
     * @param pMipmappedArrayDesc mipmapped array descriptor
     * @param numMipmapLevels Number of mipmap levels
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuMipmappedArrayDestroy
     * @see JCudaDriver#cuMipmappedArrayGetLevel
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayCreate(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels)
    {
        return checkResult(cuMipmappedArrayCreateNative(pHandle, pMipmappedArrayDesc, numMipmapLevels));
    }
    private static native int cuMipmappedArrayCreateNative(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels);

    /**
     * Gets a mipmap level of a CUDA mipmapped array.
     *
     *      * CUresult cuMipmappedArrayGetLevel (
     *      CUarray* pLevelArray,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  level )
     * 
     * 
     *   Gets a mipmap level of a CUDA mipmapped
     *     array.  Returns in *pLevelArray a CUDA array that represents
     *     a single mipmap level of the CUDA mipmapped array hMipmappedArray.
     *   
     *   If level is greater than the
     *     maximum number of levels in this mipmapped array, CUDA_ERROR_INVALID_VALUE
     *     is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pLevelArray Returned mipmap level CUDA array
     * @param hMipmappedArray CUDA mipmapped array
     * @param level Mipmap level
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuMipmappedArrayCreate
     * @see JCudaDriver#cuMipmappedArrayDestroy
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayGetLevel(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level)
    {
        return checkResult(cuMipmappedArrayGetLevelNative(pLevelArray, hMipmappedArray, level));
    }
    private static native int cuMipmappedArrayGetLevelNative(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level);


    /**
     * Destroys a CUDA mipmapped array.
     *
     *      * CUresult cuMipmappedArrayDestroy (
     *      CUmipmappedArray hMipmappedArray )
     * 
     * 
     *   Destroys a CUDA mipmapped array.  Destroys
     *     the CUDA mipmapped array hMipmappedArray.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hMipmappedArray Mipmapped array to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ARRAY_IS_MAPPED
     *
     * @see JCudaDriver#cuMipmappedArrayCreate
     * @see JCudaDriver#cuMipmappedArrayGetLevel
     * @see JCudaDriver#cuArrayCreate
     */
    public static int cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray)
    {
        return checkResult(cuMipmappedArrayDestroyNative(hMipmappedArray));
    }
    private static native int cuMipmappedArrayDestroyNative(CUmipmappedArray hMipmappedArray);

    
    /**
    * Allocate an address range reservation.
 
    * 

    * Reserves a virtual address range based on the given parameters, giving
    * the starting address of the range in \p ptr.  This API requires a system that
    * supports UVA.  The size and address parameters must be a multiple of the
    * host page size and the alignment must be a power of two or zero for default
    * alignment.
    *
    * @param ptr Resulting pointer to start of virtual address range allocated
    * @param size Size of the reserved virtual address range requested
    * @param alignment - Alignment of the reserved virtual address range requested
    * @param addr Fixed starting address range requested
    * @param flags Currently unused, must be zero
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
    * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED,
    * CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemAddressFree
    */
    public static int cuMemAddressReserve(CUdeviceptr ptr, long size, long alignment, CUdeviceptr addr, long flags)
    {
        return checkResult(cuMemAddressReserveNative(ptr, size, alignment, addr, flags));
    }
    private static native int cuMemAddressReserveNative(CUdeviceptr ptr, long size, long alignment, CUdeviceptr addr, long flags);

    /**
    * Free an address range reservation.

    * 

    * Frees a virtual address range reserved by cuMemAddressReserve.  The size
    * must match what was given to memAddressReserve and the ptr given must
    * match what was returned from memAddressReserve.
    *
    * @param ptr Starting address of the virtual address range to free
    * @param size Size of the virtual address region to free
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
    * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED,
    * CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemAddressReserve
    */
    public static int cuMemAddressFree(CUdeviceptr ptr, long size)
    {
        return checkResult(cuMemAddressFreeNative(ptr, size));
    }
    private static native int cuMemAddressFreeNative(CUdeviceptr ptr, long size);

    /**
    * Create a shareable memory handle representing a memory allocation of a 
    * given size described by the given properties.

    * 

    * This creates a memory allocation on the target device specified through the
    * \p prop strcuture. The created allocation will not have any device or host
    * mappings. The generic memory \p handle for the allocation can be
    * mapped to the address space of calling process via ::cuMemMap. This handle
    * cannot be transmitted directly to other processes (see
    * ::cuMemExportToShareableHandle).  On Windows, the caller must also pass
    * an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which
    * limits or allows access to this handle for a recepient process (see
    * ::CUmemAllocationProp::win32HandleMetaData for more).  The \p size of this
    * allocation must be a multiple of the the value given via
    * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM
    * flag.
    *
    * @param handle Value of handle returned. All operations on this allocation are to be performed using this handle.
    * @param size Size of the allocation requested
    * @param prop Properties of the allocation to create.
    * @param flags flags for future use, must be zero now.
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
    * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, 
    * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemRelease
    * @see JCudaDriver#cuMemExportToShareableHandle
    * @see JCudaDriver#cuMemImportFromShareableHandle
    */
    public static int cuMemCreate(CUmemGenericAllocationHandle handle, long size, CUmemAllocationProp prop, long flags)
    {
        return checkResult(cuMemCreateNative(handle, size, prop, flags));
    }
    private static native int cuMemCreateNative(CUmemGenericAllocationHandle handle, long size, CUmemAllocationProp prop, long flags);

    /**
    * Release a memory handle representing a memory allocation which was 
    * previously allocated through cuMemCreate.

    * 

    * Frees the memory that was allocated on a device through cuMemCreate.

    * 

    * The memory allocation will be freed when all outstanding mappings to the memory
    * are unmapped and when all outstanding references to the handle (including it's
    * shareable counterparts) are also released. The generic memory handle can be
    * freed when there are still outstanding mappings made with this handle. Each
    * time a recepient process imports a shareable handle, it needs to pair it with
    * ::cuMemRelease for the handle to be freed.  If \p handle is not a valid handle
    * the behavior is undefined. 
    * 
    * @param handle Value of handle which was returned previously by cuMemCreate.
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, 
    * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED,
    * CUDA_ERROR_NOT_SUPPORTED
    * 
    * @see JCudaDriver cuMemCreate
    */
    public static int cuMemRelease(CUmemGenericAllocationHandle handle)
    {
        return checkResult(cuMemReleaseNative(handle));
    }
    private static native int cuMemReleaseNative(CUmemGenericAllocationHandle handle);

    /**
    * Maps an allocation handle to a reserved virtual address range.

    * 

    * Maps bytes of memory represented by \p handle starting from byte \p offset to
    * \p size to address range [\p addr, \p addr + \p size]. This range must be an
    * address reservation previously reserved with ::cuMemAddressReserve, and
    * \p offset + \p size must be less than the size of the memory allocation.
    * Both \p ptr, \p size, and \p offset must be a multiple of the value given via
    * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM flag.

    * 

    * Please note calling ::cuMemMap does not make the address accessible,
    * the caller needs to update accessibility of a contiguous mapped VA
    * range by calling ::cuMemSetAccess.

    * 

    * Once a recipient process obtains a shareable memory handle
    * from ::cuMemImportFromShareableHandle, the process must
    * use ::cuMemMap to map the memory into its address ranges before
    * setting accessibility with ::cuMemSetAccess.

    * 

    * ::cuMemMap can only create mappings on VA range reservations 
    * that are not currently mapped.
    * 
    * @param ptr Address where memory will be mapped. 
    * @param size Size of the memory mapping. 
    * @param offset Offset into the memory represented by 
    *                   - \p handle from which to start mapping
    *                   - Note: currently must be zero.
    * @param handle Handle to a shareable memory 
    * @param flags flags for future use, must be zero now. 
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY,
    * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, 
    * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemUnmap
    * @see JCudaDriver#cuMemSetAccess
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemAddressReserve
    * @see JCudaDriver#cuMemImportFromShareableHandle
    */
    public static int cuMemMap(CUdeviceptr ptr, long size, long offset, CUmemGenericAllocationHandle handle, long flags)
    {
        return checkResult(cuMemMapNative(ptr, size, offset, handle, flags));
    }
    private static native int cuMemMapNative(CUdeviceptr ptr, long size, long offset, CUmemGenericAllocationHandle handle, long flags);

    /**
     * Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
     * 


     * Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
     * Each operation is specified by a ::CUarrayMapInfo entry in the \p mapInfoList array of size \p count.
     * The structure ::CUarrayMapInfo is defined in {@link CUarrayMapInfo}
     * where ::CUarrayMapInfo::resourceType specifies the type of resource to be operated on.
     * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_ARRAY then 
     * ::CUarrayMapInfo::resource::array must be set to a valid sparse CUDA array handle.
     * The CUDA array must be either a 2D, 2D layered or 3D CUDA array and must have been allocated using
     * ::cuArrayCreate or ::cuArray3DCreate with the flag ::CUDA_ARRAY3D_SPARSE. 
     * For CUDA arrays obtained using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned.
     * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY 
     * then ::CUarrayMapInfo::resource::mipmap must be set to a valid sparse CUDA mipmapped array handle.
     * The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been
     * allocated using ::cuMipmappedArrayCreate with the flag ::CUDA_ARRAY3D_SPARSE.
     * 


     * ::CUarrayMapInfo::subresourceType specifies the type of subresource within the resource. 
     * ::CUarraySparseSubresourceType_enum is defined as {@link CUarraySparseSubresourceType}
     * where ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL indicates a
     * sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels which
     * are too small to span at least one tile in any dimension constitute the mip tail region as indicated by 
     * ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL subresource type.
     * 


     * If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL
     * then ::CUarrayMapInfo::subresource::sparseLevel struct must contain valid array subregion offsets and extents.
     * The ::CUarrayMapInfo::subresource::sparseLevel::offsetX, ::CUarrayMapInfo::subresource::sparseLevel::offsetY
     * and ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must specify valid X, Y and Z offsets respectively.
     * The ::CUarrayMapInfo::subresource::sparseLevel::extentWidth, ::CUarrayMapInfo::subresource::sparseLevel::extentHeight
     * and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth must specify valid width, height and depth extents respectively.
     * These offsets and extents must be aligned to the corresponding tile dimension.
     * For CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::level must specify a valid mip level index. Otherwise,
     * must be zero.
     * For layered CUDA arrays and layered CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::layer must specify a valid layer index. Otherwise,
     * must be zero.
     * ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must be zero and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth
     * must be set to 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays.
     * Tile extents can be obtained by calling ::cuArrayGetSparseProperties and ::cuMipmappedArrayGetSparseProperties
     * 


     * If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL
     * then ::CUarrayMapInfo::subresource::miptail struct must contain valid mip tail offset in 
     * ::CUarrayMapInfo::subresource::miptail::offset and size in ::CUarrayMapInfo::subresource::miptail::size.
     * Both, mip tail offset and mip tail size must be aligned to the tile size. 
     * For layered CUDA mipmapped arrays which don't have the flag ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL set in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags
     * as returned by ::cuMipmappedArrayGetSparseProperties, ::CUarrayMapInfo::subresource::miptail::layer must specify a valid layer index.
     * Otherwise, must be zero.
     * 


     * ::CUarrayMapInfo::memOperationType specifies the type of operation. ::CUmemOperationType is defined as
     * {@link CUmemOperationType}.
     * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP then the subresource 
     * will be mapped onto the tile pool memory specified by ::CUarrayMapInfo::memHandle at offset ::CUarrayMapInfo::offset. 
     * The tile pool allocation has to be created by specifying the ::CU_MEM_CREATE_USAGE_TILE_POOL flag when calling ::cuMemCreate. Also, 
     * ::CUarrayMapInfo::memHandleType must be set to ::CUmemHandleType::CU_MEM_HANDLE_TYPE_GENERIC.
     * 


     * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_UNMAP then an unmapping operation
     * is performed. ::CUarrayMapInfo::memHandle must be NULL.
     * 


     * ::CUarrayMapInfo::deviceBitMask specifies the list of devices that must map or unmap physical memory. 
     * Currently, this mask must have exactly one bit set, and the corresponding device must match the device associated with the stream. 
     * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP, the device must also match 
     * the device associated with the tile pool memory allocation as specified by ::CUarrayMapInfo::memHandle.
     * 


     * ::CUarrayMapInfo::flags and ::CUarrayMapInfo::reserved[] are unused and must be set to zero.
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE
     *
     * @param mapInfoList List of ::CUarrayMapInfo
     * @param count Count of ::CUarrayMapInfo  in \p mapInfoList
     * @param hStream Stream identifier for the stream to use for map or unmap operations
     *
     * @see JCudaDriver#cuMipmappedArrayCreate
     * @see JCudaDriver#cuArrayCreate
     * @see JCudaDriver#cuArray3DCreate
     * @see JCudaDriver#cuMemCreate
     * @see JCudaDriver#cuArrayGetSparseProperties
     * @see JCudaDriver#cuMipmappedArrayGetSparseProperties
     */
    public static int cuMemMapArrayAsync(CUarrayMapInfo mapInfoList[], int count, CUstream hStream)
    {
        return checkResult(cuMemMapArrayAsyncNative(mapInfoList, count, hStream));
    }
    private static native int cuMemMapArrayAsyncNative(CUarrayMapInfo mapInfoList[], int count, CUstream hStream);
    
    
    /**
    * Unmap the backing memory of a given address range.

    * 

    * The range must be the entire contiguous address range that was mapped to.  In
    * other words, ::cuMemUnmap cannot unmap a sub-range of an address range mapped
    * by ::cuMemCreate / ::cuMemMap.  Any backing memory allocations will be freed
    * if there are no existing mappings and there are no unreleased memory handles.

    * 

    * When ::cuMemUnmap returns successfully the address range is converted to an
    * address reservation and can be used for a future calls to ::cuMemMap.  Any new
    * mapping to this virtual address will need to have access granted through
    * ::cuMemSetAccess, as all mappings start with no accessibility setup.
    *
    * @param ptr Starting address for the virtual address range to unmap
    * @param size Size of the virtual address range to unmap
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, 
    * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, 
    * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemAddressReserve
    */
    public static int cuMemUnmap(CUdeviceptr ptr, long size)
    {
        return checkResult(cuMemUnmapNative(ptr, size));
    }
    private static native int cuMemUnmapNative(CUdeviceptr ptr, long size);

    /**
    * Set the access flags for each location specified in \p desc for the given virtual address range.

    * 

    * Given the virtual address range via \p ptr and \p size, and the locations
    * in the array given by \p desc and \p count, set the access flags for the
    * target locations.  The range must be a fully mapped address range
    * containing all allocations created by ::cuMemMap / ::cuMemCreate.

    *
    * @param ptr Starting address for the virtual address range
    * @param size Length of the virtual address range
    * @param desc Array of ::CUmemAccessDesc that describe how to change the
    *                 mapping for each location specified
    * @param count Number of ::CUmemAccessDesc in \p desc
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
    * CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemSetAccess
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemMap
    */
    public static int cuMemSetAccess(CUdeviceptr ptr, long size, CUmemAccessDesc desc[], long count)
    {
        return checkResult(cuMemSetAccessNative(ptr, size, desc, count));
    }
    private static native int cuMemSetAccessNative(CUdeviceptr ptr, long size, CUmemAccessDesc desc[], long count);
    

    /**
    * Get the access \p flags set for the given \p location and \p ptr

    * 

    * @param flags Flags set for this location
    * @param location Location in which to check the flags for
    * @param ptr Address in which to check the access flags for
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE,
    * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED,
    * CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemSetAccess
    */
    public static int cuMemGetAccess(long flags[], CUmemLocation location, CUdeviceptr ptr)
    {
        return checkResult(cuMemGetAccessNative(flags, location, ptr));
    }
    private static native int cuMemGetAccessNative(long flags[], CUmemLocation location, CUdeviceptr ptr);

    /**
    * Exports an allocation to a requested shareable handle type.

    * 

    * Given a CUDA memory handle, create a shareable memory
    * allocation handle that can be used to share the memory with other
    * processes. The recipient process can convert the shareable handle back into a
    * CUDA memory handle using ::cuMemImportFromShareableHandle and map
    * it with ::cuMemMap. The implementation of what this handle is and how it
    * can be transferred is defined by the requested handle type in \p handleType

    * 

    * Once all shareable handles are closed and the allocation is released, the allocated
    * memory referenced will be released back to the OS and uses of the CUDA handle afterward
    * will lead to undefined behavior.

    * 

    * This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL)
    * that support importing memory from the shareable type

    * 

    * @param shareableHandle Pointer to the location in which to store the requested handle type
    * @param handle CUDA handle for the memory allocation
    * @param handleType Type of shareable handle requested (defines type and size of the \p shareableHandle output parameter)
    * @param flags Reserved, must be zero
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED,
    * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemImportFromShareableHandle
    */
    public static int cuMemExportToShareableHandle(Pointer shareableHandle, CUmemGenericAllocationHandle handle, int handleType, long flags)
    {
        return checkResult(cuMemExportToShareableHandleNative(shareableHandle, handle, handleType, flags));
    }
    private static native int cuMemExportToShareableHandleNative(Pointer shareableHandle, CUmemGenericAllocationHandle handle, int handleType, long flags);

    /**
    * Imports an allocation from a requested shareable handle type.

    * 

    * If the current process cannot support the memory described by this shareable
    * handle, this API will error as CUDA_ERROR_NOT_SUPPORTED.

    * 

    * \note Importing shareable handles exported from some graphics APIs(Vulkan, OpenGL, etc)
    * created on devices under an SLI group may not be supported, and thus this API will
    * return CUDA_ERROR_NOT_SUPPORTED.
    * There is no guarantee that the contents of \p handle will be the same CUDA memory handle
    * for the same given OS shareable handle, or the same underlying allocation.

    *
    * @param handle CUDA Memory handle for the memory allocation.
    * @param osHandle Shareable Handle representing the memory allocation that is to be imported. 
    * @param shHandleType handle type of the exported handle ::CUmemAllocationHandleType.
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED,
    * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemExportToShareableHandle
    * @see JCudaDriver#cuMemMap
    * @see JCudaDriver#cuMemRelease
    */
    public static int cuMemImportFromShareableHandle(CUmemGenericAllocationHandle handle, Pointer osHandle, int shHandleType)
    {
        return checkResult(cuMemImportFromShareableHandleNative(handle, osHandle, shHandleType));
    }
    private static native int cuMemImportFromShareableHandleNative(CUmemGenericAllocationHandle handle, Pointer osHandle, int shHandleType);

    /**
    * Calculates either the minimal or recommended granularity
 
    *

    * Calculates either the minimal or recommended granularity
    * for a given allocation specification and returns it in granularity.  This
    * granularity can be used as a multiple for alignment, size, or address mapping.
    *
    * @param granularity Returned granularity.
    * @param prop Property for which to determine the granularity for
    * @param option Determines which granularity to return
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED,
    * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemMap
    */
    public static int cuMemGetAllocationGranularity(long granularity[], CUmemAllocationProp prop, int option)
    {
        return checkResult(cuMemGetAllocationGranularityNative(granularity, prop, option));
    }
    private static native int cuMemGetAllocationGranularityNative(long granularity[], CUmemAllocationProp prop, int option);

    /**
    * Retrieve the contents of the property structure defining properties for this handle
    *
    * @param prop Pointer to a properties structure which will hold the information about this handle
    * @param handle Handle which to perform the query on
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED,
    * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemImportFromShareableHandle
    */
    public static int cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp prop, CUmemGenericAllocationHandle handle)
    {
        return checkResult(cuMemGetAllocationPropertiesFromHandleNative(prop, handle));
    }
    private static native int cuMemGetAllocationPropertiesFromHandleNative(CUmemAllocationProp prop, CUmemGenericAllocationHandle handle);
    
    
    /**
    * Given an address addr, returns the allocation handle of the backing memory allocation.
    *
    * The handle is guaranteed to be the same handle value used to map the memory. If the address
    * requested is not mapped, the function will fail. The returned handle must be released with
    * corresponding number of calls to ::cuMemRelease.
    *
    * The address addr, can be any address in a range previously mapped
    * by ::cuMemMap, and not necessarily the start address.
    *
    * @param handle CUDA Memory handle for the backing memory allocation.
    * @param addr Memory address to query, that has been mapped previously.
    * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED,
    * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED
    *
    * @see JCudaDriver#cuMemCreate
    * @see JCudaDriver#cuMemRelease
    * @see JCudaDriver#cuMemMap
    */
    public static int cuMemRetainAllocationHandle(CUmemGenericAllocationHandle handle, Pointer addr)
    {
        return checkResult(cuMemRetainAllocationHandleNative(handle, addr));
    }
    private static native int cuMemRetainAllocationHandleNative(CUmemGenericAllocationHandle handle, Pointer addr);
    
    
    /**
     *      * Frees memory with stream ordered semantics.
     *
     * Inserts a free operation into \p hStream.
     * The allocation must not be accessed after stream execution reaches the free.
     * After this API returns, accessing the memory from any subsequent work launched on the GPU
     * or querying its pointer attributes results in undefined behavior.
     * 
     * \param dptr - memory to free
     * \param hStream - The stream establishing the stream ordering contract. 
     * \returns
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context),
     * ::CUDA_ERROR_NOT_SUPPORTED
     * 
     */
    public static int cuMemFreeAsync(
        CUdeviceptr dptr, 
        CUstream hStream)
    {
        return checkResult(cuMemFreeAsyncNative(dptr, hStream));
    }
    private static native int cuMemFreeAsyncNative(
        CUdeviceptr dptr, 
        CUstream hStream);


    /**
     * Creates a texture reference.
     *
     *      * CUresult cuTexRefCreate (
     *      CUtexref* pTexRef )
     * 
     * 
     *   Creates a texture reference.
     *     Deprecated Creates a texture reference
     *     and returns its handle in *pTexRef. Once created, the
     *     application must call cuTexRefSetArray() or cuTexRefSetAddress() to
     *     associate the reference with allocated memory. Other texture reference
     *     functions are used to specify the format and interpretation
     *     (addressing, filtering, etc.) to be used
     *     when the memory is read through this texture reference.
     *   
     * 
     *
     * @param pTexRef Returned texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefDestroy
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuTexRefCreate(CUtexref pTexRef)
    {
        return checkResult(cuTexRefCreateNative(pTexRef));
    }

    private static native int cuTexRefCreateNative(CUtexref pTexRef);


    /**
     * Destroys a texture reference.
     *
     *      * CUresult cuTexRefDestroy (
     *      CUtexref hTexRef )
     * 
     * 
     *   Destroys a texture reference.
     *     Deprecated Destroys the texture reference
     *     specified by hTexRef.
     *   
     * 
     *
     * @param hTexRef Texture reference to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefCreate
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuTexRefDestroy(CUtexref hTexRef)
    {
        return checkResult(cuTexRefDestroyNative(hTexRef));
    }

    private static native int cuTexRefDestroyNative(CUtexref hTexRef);


    /**
     * Binds an array as a texture reference.
     *
     *      * CUresult cuTexRefSetArray (
     *      CUtexref hTexRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Binds an array as a texture reference.
     *     Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state associated with
     *     the texture reference is superseded by this function. Flags
     *     must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound
     *     to hTexRef is unbound.
     *   
     * 
     *
     * @param hTexRef Texture reference to bind
     * @param hArray Array to bind
     * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags)
    {
        return checkResult(cuTexRefSetArrayNative(hTexRef, hArray, Flags));
    }
    private static native int cuTexRefSetArrayNative(CUtexref hTexRef, CUarray hArray, int Flags);


    /**
     * Binds a mipmapped array to a texture reference.
     *
     *      * CUresult cuTexRefSetMipmappedArray (
     *      CUtexref hTexRef,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Binds a mipmapped array to a texture
     *     reference.  Binds the CUDA mipmapped array hMipmappedArray
     *     to the texture reference hTexRef. Any previous address or
     *     CUDA array state associated with the texture reference is superseded
     *     by this function. Flags must be set to CU_TRSA_OVERRIDE_FORMAT.
     *     Any CUDA array previously bound to hTexRef is unbound.
     *   
     * 
     *
     * @param hTexRef Texture reference to bind
     * @param hMipmappedArray Mipmapped array to bind
     * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags)
    {
        return checkResult(cuTexRefSetMipmappedArrayNative(hTexRef, hMipmappedArray, Flags));
    }
    private static native int cuTexRefSetMipmappedArrayNative(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags);


    /**
     * Binds an address as a texture reference.
     *
     *      * CUresult cuTexRefSetAddress (
     *      size_t* ByteOffset,
     *      CUtexref hTexRef,
     *      CUdeviceptr dptr,
     *      size_t bytes )
     * 
     * 
     *   Binds an address as a texture reference.
     *     Binds a linear address range to the texture reference hTexRef.
     *     Any previous address or CUDA array state associated with the texture
     *     reference is superseded by this function. Any memory
     *     previously bound to hTexRef is
     *     unbound.
     *   
     *   Since the hardware enforces an alignment
     *     requirement on texture base addresses, cuTexRefSetAddress() passes back
     *     a byte offset in *ByteOffset that must be applied to texture
     *     fetches in order to read from the desired memory. This offset must be
     *     divided by the texel
     *     size and passed to kernels that read from
     *     the texture so they can be applied to the tex1Dfetch() function.
     *   
     *   If the device memory pointer was returned
     *     from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be
     *     passed as the ByteOffset parameter.
     *   
     *   The total number of elements (or texels)
     *     in the linear address range cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
     *     elements is computed as (bytes / bytesPerElement), where
     *     bytesPerElement is determined from the data format and number of
     *     components set using cuTexRefSetFormat().
     *   
     * 
     *
     * @param ByteOffset Returned byte offset
     * @param hTexRef Texture reference to bind
     * @param dptr Device pointer to bind
     * @param bytes Size of memory to bind in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetAddress(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes)
    {
        return checkResult(cuTexRefSetAddressNative(ByteOffset, hTexRef, dptr, bytes));
    }

    private static native int cuTexRefSetAddressNative(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes);


    /**
     * Sets the format for a texture reference.
     *
     *      * CUresult cuTexRefSetFormat (
     *      CUtexref hTexRef,
     *      CUarray_format fmt,
     *      int  NumPackedComponents )
     * 
     * 
     *   Sets the format for a texture reference.
     *     Specifies the format of the data to be read by the texture reference
     *     hTexRef. fmt and NumPackedComponents are
     *     exactly analogous to the Format and NumChannels members of the
     *     CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each
     *     component and the number of components per array element.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param fmt Format to set
     * @param NumPackedComponents Number of components per array element
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents)
    {
        return checkResult(cuTexRefSetFormatNative(hTexRef, fmt, NumPackedComponents));
    }

    private static native int cuTexRefSetFormatNative(CUtexref hTexRef, int fmt, int NumPackedComponents);



    /**
     * Binds an address as a 2D texture reference.
     *
     *      * CUresult cuTexRefSetAddress2D (
     *      CUtexref hTexRef,
     *      const CUDA_ARRAY_DESCRIPTOR* desc,
     *      CUdeviceptr dptr,
     *      size_t Pitch )
     * 
     * 
     *   Binds an address as a 2D texture
     *     reference.  Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with
     *     the texture reference is superseded by this function. Any memory
     *     previously bound to hTexRef is
     *     unbound.
     *   
     *   Using a tex2D() function inside a kernel
     *     requires a call to either cuTexRefSetArray() to bind the corresponding
     *     texture reference to an array, or cuTexRefSetAddress2D() to bind the
     *     texture reference to linear memory.
     *   
     *   Function calls to cuTexRefSetFormat()
     *     cannot follow calls to cuTexRefSetAddress2D() for the same texture
     *     reference.
     *   
     *   It is required that dptr be
     *     aligned to the appropriate hardware-specific texture alignment. You
     *     can query this value using the device attribute
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned dptr
     *     is supplied, CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   Pitch has to be aligned to
     *     the hardware-specific texture pitch alignment. This value can be
     *     queried using the device attribute
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT.
     *     If an unaligned Pitch is supplied, CUDA_ERROR_INVALID_VALUE
     *     is returned.
     *   
     *   Width and Height, which are specified
     *     in elements (or texels), cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. Pitch, which is specified in bytes, cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
     *   
     * 
     *
     * @param hTexRef Texture reference to bind
     * @param desc Descriptor of CUDA array
     * @param dptr Device pointer to bind
     * @param Pitch Line pitch in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes)
    {
        return checkResult(cuTexRefSetAddress2DNative(hTexRef, desc, dptr, PitchInBytes));
    }
    private static native int cuTexRefSetAddress2DNative(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes);



    /**
     * Sets the addressing mode for a texture reference.
     *
     *      * CUresult cuTexRefSetAddressMode (
     *      CUtexref hTexRef,
     *      int  dim,
     *      CUaddress_mode am )
     * 
     * 
     *   Sets the addressing mode for a texture
     *     reference.  Specifies the addressing mode am for the given
     *     dimension dim of the texture reference hTexRef. If
     *     dim is zero, the addressing mode is applied to the first
     *     parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as:
     *   
   typedef enum CUaddress_mode_enum {
     *       CU_TR_ADDRESS_MODE_WRAP = 0,
     *       CU_TR_ADDRESS_MODE_CLAMP = 1,
     *       CU_TR_ADDRESS_MODE_MIRROR = 2,
     *       CU_TR_ADDRESS_MODE_BORDER = 3
     *    } CUaddress_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory. Also, if the flag,
     *     CU_TRSF_NORMALIZED_COORDINATES, is not set, the only supported address
     *     mode is CU_TR_ADDRESS_MODE_CLAMP.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param dim Dimension
     * @param am Addressing mode to set
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am)
    {
        return checkResult(cuTexRefSetAddressModeNative(hTexRef, dim, am));
    }

    private static native int cuTexRefSetAddressModeNative(CUtexref hTexRef, int dim, int am);


    /**
     * Sets the filtering mode for a texture reference.
     *
     *      * CUresult cuTexRefSetFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
     * 
     *   Sets the filtering mode for a texture
     *     reference.  Specifies the filtering mode fm to be used when
     *     reading memory through the texture reference hTexRef.
     *     CUfilter_mode_enum is defined as:
     *   
     *      typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param fm Filtering mode to set
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm)
    {
        return checkResult(cuTexRefSetFilterModeNative(hTexRef, fm));
    }

    private static native int cuTexRefSetFilterModeNative(CUtexref hTexRef, int fm);


    /**
     * Sets the mipmap filtering mode for a texture reference.
     *
     *      * CUresult cuTexRefSetMipmapFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
     * 
     *   Sets the mipmap filtering mode for a
     *     texture reference.  Specifies the mipmap filtering mode fm
     *     to be used when reading memory through the texture reference hTexRef. CUfilter_mode_enum is defined as:
     *   
     *      typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param fm Filtering mode to set
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetMipmapFilterMode(CUtexref hTexRef, int fm)
    {
        return checkResult(cuTexRefSetMipmapFilterModeNative(hTexRef, fm));
    }
    private static native int cuTexRefSetMipmapFilterModeNative(CUtexref hTexRef, int fm);


    /**
     * Sets the mipmap level bias for a texture reference.
     *
     *      * CUresult cuTexRefSetMipmapLevelBias (
     *      CUtexref hTexRef,
     *      float  bias )
     * 
     * 
     *   Sets the mipmap level bias for a texture
     *     reference.  Specifies the mipmap level bias bias to be added
     *     to the specified mipmap level when reading memory through the texture
     *     reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param bias Mipmap level bias
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias)
    {
        return checkResult(cuTexRefSetMipmapLevelBiasNative(hTexRef, bias));
    }
    private static native int cuTexRefSetMipmapLevelBiasNative(CUtexref hTexRef, float bias);


    /**
     * Sets the mipmap min/max mipmap level clamps for a texture reference.
     *
     *      * CUresult cuTexRefSetMipmapLevelClamp (
     *      CUtexref hTexRef,
     *      float  minMipmapLevelClamp,
     *      float  maxMipmapLevelClamp )
     * 
     * 
     *   Sets the mipmap min/max mipmap level
     *     clamps for a texture reference.  Specifies the min/max mipmap level
     *     clamps, minMipmapLevelClamp and maxMipmapLevelClamp
     *     respectively, to be used when reading memory through the texture
     *     reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is not bound to a mipmapped array.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param minMipmapLevelClamp Mipmap min level clamp
     * @param maxMipmapLevelClamp Mipmap max level clamp
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp)
    {
        return checkResult(cuTexRefSetMipmapLevelClampNative(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp));
    }
    private static native int cuTexRefSetMipmapLevelClampNative(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);


    /**
     * Sets the maximum anistropy for a texture reference.
     *
     *      * CUresult cuTexRefSetMaxAnisotropy (
     *      CUtexref hTexRef,
     *      unsigned int  maxAniso )
     * 
     * 
     *   Sets the maximum anistropy for a texture
     *     reference.  Specifies the maximum aniostropy maxAniso to be
     *     used when reading memory through the texture reference hTexRef.
     *   
     *   Note that this call has no effect if
     *     hTexRef is bound to linear memory.
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param maxAniso Maximum anisotropy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetMaxAnisotropy(CUtexref hTexRef, int maxAniso)
    {
        return checkResult(cuTexRefSetMaxAnisotropyNative(hTexRef, maxAniso));
    }
    private static native int cuTexRefSetMaxAnisotropyNative(CUtexref hTexRef, int maxAniso);


    /**
     * Sets the border color for a texture reference

     * 

     * Specifies the value of the RGBA color via the pBorderColor to the texture reference
     * hTexRef. The color value supports only float type and holds color components in
     * the following sequence:

     * pBorderColor[0] holds 'R' component

     * pBorderColor[1] holds 'G' component

     * pBorderColor[2] holds 'B' component

     * pBorderColor[3] holds 'A' component

     * 

     * Note that the color values can be set only when the Address mode is set to
     * CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode.
     * Applications using integer border color values have to "reinterpret_cast" their values to float.
     *
     * @param hTexRef Texture reference
     * @param pBorderColor RGBA color
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetBorderColor
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetBorderColor(CUtexref hTexRef, float pBorderColor[])
    {
        return checkResult(cuTexRefSetBorderColorNative(hTexRef, pBorderColor));
    }
    private static native int cuTexRefSetBorderColorNative(CUtexref hTexRef, float pBorderColor[]);


    /**
     * Sets the flags for a texture reference.
     *
     *      * CUresult cuTexRefSetFlags (
     *      CUtexref hTexRef,
     *      unsigned int  Flags )
     * 
     * 
     *   Sets the flags for a texture reference.
     *     Specifies optional flags via Flags to specify the behavior
     *     of data returned through the texture reference hTexRef. The
     *     valid flags are:
     *   
     *   
     *     
     *       CU_TRSF_READ_AS_INTEGER, which
     *         suppresses the default behavior of having the texture promote integer
     *         data to floating point data in the range [0,
     *         1]. Note that texture with
     *         32-bit integer format would not be promoted, regardless of whether or
     *         not this flag is specified;
     *       
     *     
     *     
     *       CU_TRSF_NORMALIZED_COORDINATES,
     *         which suppresses the default behavior of having the texture coordinates
     *         range from [0, Dim) where Dim is the width or height
     *         of the CUDA array. Instead, the
     *         texture coordinates [0, 1.0) reference the entire breadth of the array
     *         dimension;
     *       
     *     
     *   
     *   
     * 
     *
     * @param hTexRef Texture reference
     * @param Flags Optional flags to set
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefSetFlags(CUtexref hTexRef, int Flags)
    {
        return checkResult(cuTexRefSetFlagsNative(hTexRef, Flags));
    }

    private static native int cuTexRefSetFlagsNative(CUtexref hTexRef, int Flags);


    /**
     * Gets the address associated with a texture reference.
     *
     *      * CUresult cuTexRefGetAddress (
     *      CUdeviceptr* pdptr,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the address associated with a
     *     texture reference.  Returns in *pdptr the base address bound
     *     to the texture reference hTexRef, or returns
     *     CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any
     *     device memory range.
     *   
     * 
     *
     * @param pdptr Returned device address
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetAddressNative(pdptr, hTexRef));
    }

    private static native int cuTexRefGetAddressNative(CUdeviceptr pdptr, CUtexref hTexRef);


    /**
     * Gets the array bound to a texture reference.
     *
     *      * CUresult cuTexRefGetArray (
     *      CUarray* phArray,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the array bound to a texture
     *     reference.  Returns in *phArray the CUDA array bound to the
     *     texture reference hTexRef, or returns CUDA_ERROR_INVALID_VALUE
     *     if the texture reference is not bound to any CUDA array.
     *   
     * 
     *
     * @param phArray Returned array
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetArrayNative(phArray, hTexRef));
    }

    private static native int cuTexRefGetArrayNative(CUarray phArray, CUtexref hTexRef);


    /**
     * Gets the mipmapped array bound to a texture reference.
     *
     *      * CUresult cuTexRefGetMipmappedArray (
     *      CUmipmappedArray* phMipmappedArray,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmapped array bound to a
     *     texture reference.  Returns in *phMipmappedArray the CUDA
     *     mipmapped array bound to the texture reference hTexRef, or
     *     returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound
     *     to any CUDA mipmapped array.
     *   
     * 
     *
     * @param phMipmappedArray Returned mipmapped array
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetMipmappedArray(CUmipmappedArray phMipmappedArray, CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmappedArrayNative(phMipmappedArray, hTexRef));
    }
    private static native int cuTexRefGetMipmappedArrayNative(CUmipmappedArray phMipmappedArray, CUtexref hTexRef);

    /**
     * Gets the addressing mode used by a texture reference.
     *
     *      * CUresult cuTexRefGetAddressMode (
     *      CUaddress_mode* pam,
     *      CUtexref hTexRef,
     *      int  dim )
     * 
     * 
     *   Gets the addressing mode used by a
     *     texture reference.  Returns in *pam the addressing mode
     *     corresponding to the dimension dim of the texture reference
     *     hTexRef. Currently, the only valid value for dim
     *     are 0 and 1.
     *   
     * 
     *
     * @param pam Returned addressing mode
     * @param hTexRef Texture reference
     * @param dim Dimension
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetAddressMode(int pam[], CUtexref hTexRef, int dim)
    {
        return checkResult(cuTexRefGetAddressModeNative(pam, hTexRef, dim));
    }

    private static native int cuTexRefGetAddressModeNative(int pam[], CUtexref hTexRef, int dim);


    /**
     * Gets the filter-mode used by a texture reference.
     *
     *      * CUresult cuTexRefGetFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the filter-mode used by a texture
     *     reference.  Returns in *pfm the filtering mode of the texture
     *     reference hTexRef.
     *   
     * 
     *
     * @param pfm Returned filtering mode
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetFilterMode(int pfm[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFilterModeNative(pfm, hTexRef));
    }

    private static native int cuTexRefGetFilterModeNative(int pfm[], CUtexref hTexRef);


    /**
     * Gets the format used by a texture reference.
     *
     *      * CUresult cuTexRefGetFormat (
     *      CUarray_format* pFormat,
     *      int* pNumChannels,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the format used by a texture
     *     reference.  Returns in *pFormat and *pNumChannels
     *     the format and number of components of the CUDA array bound to the
     *     texture reference hTexRef. If pFormat or pNumChannels is NULL, it will be ignored.
     *   
     * 
     *
     * @param pFormat Returned format
     * @param pNumChannels Returned number of components
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetFormat(int pFormat[], int pNumChannels[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFormatNative(pFormat, pNumChannels, hTexRef));
    }

    private static native int cuTexRefGetFormatNative(int pFormat[], int pNumChannels[], CUtexref hTexRef);


    /**
     * Gets the mipmap filtering mode for a texture reference.
     *
     *      * CUresult cuTexRefGetMipmapFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmap filtering mode for a
     *     texture reference.  Returns the mipmap filtering mode in pfm
     *     that's used when reading memory through the texture reference hTexRef.
     *   
     * 
     *
     * @param pfm Returned mipmap filtering mode
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetMipmapFilterMode(int pfm[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapFilterModeNative(pfm, hTexRef));
    }
    private static native int cuTexRefGetMipmapFilterModeNative(int pfm[], CUtexref hTexRef);

    /**
     * Gets the mipmap level bias for a texture reference.
     *
     *      * CUresult cuTexRefGetMipmapLevelBias (
     *      float* pbias,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the mipmap level bias for a texture
     *     reference.  Returns the mipmap level bias in pBias that's
     *     added to the specified mipmap level when reading memory through the
     *     texture reference hTexRef.
     *   
     * 
     *
     * @param pbias Returned mipmap level bias
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetMipmapLevelBias(float pbias[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapLevelBiasNative(pbias, hTexRef));
    }
    private static native int cuTexRefGetMipmapLevelBiasNative(float pbias[], CUtexref hTexRef);

    /**
     * Gets the min/max mipmap level clamps for a texture reference.
     *
     *      * CUresult cuTexRefGetMipmapLevelClamp (
     *      float* pminMipmapLevelClamp,
     *      float* pmaxMipmapLevelClamp,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the min/max mipmap level clamps for
     *     a texture reference.  Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp that's
     *     used when reading memory through the texture reference hTexRef.
     *   
     * 
     *
     * @param pminMipmapLevelClamp Returned mipmap min level clamp
     * @param pmaxMipmapLevelClamp Returned mipmap max level clamp
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetMipmapLevelClamp(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMipmapLevelClampNative(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef));
    }
    private static native int cuTexRefGetMipmapLevelClampNative(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef);

    /**
     * Gets the maximum anistropy for a texture reference.
     *
     *      * CUresult cuTexRefGetMaxAnisotropy (
     *      int* pmaxAniso,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the maximum anistropy for a texture
     *     reference.  Returns the maximum aniostropy in pmaxAniso
     *     that's used when reading memory through the texture reference hTexRef.
     *   
     * 
     *
     * @param pmaxAniso Returned maximum anisotropy
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFlags
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetMaxAnisotropy(int pmaxAniso[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetMaxAnisotropyNative(pmaxAniso, hTexRef));
    }
    private static native int cuTexRefGetMaxAnisotropyNative(int pmaxAniso[], CUtexref hTexRef);

    /**
     * brief Gets the border color used by a texture reference

     * 

     * Returns in pBorderColor, values of the RGBA color used by
     * the texture reference hTexRef.
     * The color value is of type float and holds color components in
     * the following sequence:

     * pBorderColor[0] holds 'R' component

     * pBorderColor[1] holds 'G' component

     * pBorderColor[2] holds 'B' component

     * pBorderColor[3] holds 'A' component

     *
     * @param hTexRef Texture reference
     * @param pBorderColor Returned Type and Value of RGBA color
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetBorderColor
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetBorderColor(float pBorderColor[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetBorderColorNative(pBorderColor, hTexRef));
    }
    private static native int cuTexRefGetBorderColorNative(float pBorderColor[], CUtexref hTexRef);

    /**
     * Gets the flags used by a texture reference.
     *
     *      * CUresult cuTexRefGetFlags (
     *      unsigned int* pFlags,
     *      CUtexref hTexRef )
     * 
     * 
     *   Gets the flags used by a texture
     *     reference.  Returns in *pFlags the flags of the texture
     *     reference hTexRef.
     *   
     * 
     *
     * @param pFlags Returned flags
     * @param hTexRef Texture reference
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexRefSetAddress
     * @see JCudaDriver#cuTexRefSetAddress2D
     * @see JCudaDriver#cuTexRefSetAddressMode
     * @see JCudaDriver#cuTexRefSetArray
     * @see JCudaDriver#cuTexRefSetFilterMode
     * @see JCudaDriver#cuTexRefSetFlags
     * @see JCudaDriver#cuTexRefSetFormat
     * @see JCudaDriver#cuTexRefGetAddress
     * @see JCudaDriver#cuTexRefGetAddressMode
     * @see JCudaDriver#cuTexRefGetArray
     * @see JCudaDriver#cuTexRefGetFilterMode
     * @see JCudaDriver#cuTexRefGetFormat
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuTexRefGetFlags(int pFlags[], CUtexref hTexRef)
    {
        return checkResult(cuTexRefGetFlagsNative(pFlags, hTexRef));
    }

    private static native int cuTexRefGetFlagsNative(int pFlags[], CUtexref hTexRef);


    /**
     * Sets the CUDA array for a surface reference.
     *
     *      * CUresult cuSurfRefSetArray (
     *      CUsurfref hSurfRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
     * 
     *   Sets the CUDA array for a surface
     *     reference.  Sets the CUDA array hArray to be read and written
     *     by the surface reference hSurfRef. Any previous CUDA array
     *     state associated with the surface reference is superseded by this
     *     function. Flags must be set to 0. The CUDA_ARRAY3D_SURFACE_LDST
     *     flag must have been set for the CUDA array. Any CUDA array previously
     *     bound to hSurfRef is unbound.
     *   
     * 
     *
     * @param hSurfRef Surface reference handle
     * @param hArray CUDA array handle
     * @param Flags set to 0
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuSurfRefGetArray
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, int Flags )
    {
        return checkResult(cuSurfRefSetArrayNative(hSurfRef, hArray, Flags));
    }
    private static native int cuSurfRefSetArrayNative(CUsurfref hSurfRef, CUarray hArray, int Flags );

    /**
     * Passes back the CUDA array bound to a surface reference.
     *
     *      * CUresult cuSurfRefGetArray (
     *      CUarray* phArray,
     *      CUsurfref hSurfRef )
     * 
     * 
     *   Passes back the CUDA array bound to a
     *     surface reference.  Returns in *phArray the CUDA array bound
     *     to the surface reference hSurfRef, or returns
     *     CUDA_ERROR_INVALID_VALUE if the surface reference is not bound to any
     *     CUDA array.
     *   
     * 
     *
     * @param phArray Surface reference handle
     * @param hSurfRef Surface reference handle
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuModuleGetSurfRef
     * @see JCudaDriver#cuSurfRefSetArray
     * 
     * @deprecated Deprecated as of CUDA 10.1
     */
    public static int cuSurfRefGetArray( CUarray phArray, CUsurfref hSurfRef )
    {
        return checkResult(cuSurfRefGetArrayNative(phArray, hSurfRef));
    }
    private static native int cuSurfRefGetArrayNative( CUarray phArray, CUsurfref hSurfRef );



    /**
     * Creates a texture object.
     *
     *      * CUresult cuTexObjectCreate (
     *      CUtexObject* pTexObject,
     *      const CUDA_RESOURCE_DESC* pResDesc,
     *      const CUDA_TEXTURE_DESC* pTexDesc,
     *      const CUDA_RESOURCE_VIEW_DESC* pResViewDesc )
     * 
     * 
     *   Creates a texture object.  Creates a
     *     texture object and returns it in pTexObject. pResDesc
     *     describes the data to texture from. pTexDesc describes how
     *     the data should be sampled. pResViewDesc is an optional
     *     argument that specifies an alternate format for the data described by
     *     pResDesc, and also describes the subresource region to
     *     restrict access to when texturing. pResViewDesc can only be
     *     specified if the type of resource is a CUDA array or a CUDA mipmapped
     *     array.
     *   
     *   Texture objects are only supported on
     *     devices of compute capability 3.0 or higher.
     *   
     *   The CUDA_RESOURCE_DESC structure is
     *     defined as:
     *   
        typedef struct CUDA_RESOURCE_DESC_st
     *         {
     *             CUresourcetype resType;
     *
     *             union {
     *                 struct {
     *                     CUarray hArray;
     *                 } array;
     *                 struct {
     *                     CUmipmappedArray hMipmappedArray;
     *                 } mipmap;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t sizeInBytes;
     *                 } linear;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t width;
     *                     size_t height;
     *                     size_t pitchInBytes;
     *                 } pitch2D;
     *             } res;
     *
     *             unsigned int flags;
     *         } CUDA_RESOURCE_DESC;
     *   where:
     *   
     *     
     *       
     *         CUDA_RESOURCE_DESC::resType
     *         specifies the type of resource to texture from. CUresourceType is
     *         defined as:
     *                 typedef enum CUresourcetype_enum {
     *             CU_RESOURCE_TYPE_ARRAY           = 0x00,
     *             CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
     *             CU_RESOURCE_TYPE_LINEAR          = 0x02,
     *             CU_RESOURCE_TYPE_PITCH2D         = 0x03
     *         } CUresourcetype;
     *       
     *     
     *   
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_ARRAY, CUDA_RESOURCE_DESC::res::array::hArray must
     *     be set to a valid CUDA array handle.
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_MIPMAPPED_ARRAY,
     *     CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray must be set to a valid
     *     CUDA mipmapped array handle.
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_LINEAR, CUDA_RESOURCE_DESC::res::linear::devPtr
     *     must be set to a valid device pointer, that is aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::linear::format
     *     and CUDA_RESOURCE_DESC::res::linear::numChannels describe the format
     *     of each component
     *     and the number of components per array
     *     element. CUDA_RESOURCE_DESC::res::linear::sizeInBytes specifies the
     *     size of the array
     *     in bytes. The total number of elements
     *     in the linear address range cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
     *     elements is computed as (sizeInBytes / (sizeof(format) *
     *     numChannels)).
     *   
     *   If CUDA_RESOURCE_DESC::resType is set
     *     to CU_RESOURCE_TYPE_PITCH2D, CUDA_RESOURCE_DESC::res::pitch2D::devPtr
     *     must be set to a valid device pointer, that is aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::pitch2D::format
     *     and CUDA_RESOURCE_DESC::res::pitch2D::numChannels describe the format
     *     of each component
     *     and the number of components per array
     *     element. CUDA_RESOURCE_DESC::res::pitch2D::width and
     *     CUDA_RESOURCE_DESC::res::pitch2D::height
     *     specify the width and height of the array
     *     in elements, and cannot exceed CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
     *     and CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively.
     *     CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch
     *     between two rows in bytes and has to be
     *     aligned to
     *     CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed
     *     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
     *   
     *   
     *     
     *       flags must be set to zero.
     *     
     *   
     *   
     *   The CUDA_TEXTURE_DESC struct is defined
     *     as
     *   
        typedef struct CUDA_TEXTURE_DESC_st {
     *             CUaddress_mode addressMode[3];
     *             CUfilter_mode filterMode;
     *             unsigned int flags;
     *             unsigned int maxAnisotropy;
     *             CUfilter_mode mipmapFilterMode;
     *             float mipmapLevelBias;
     *             float minMipmapLevelClamp;
     *             float maxMipmapLevelClamp;
     *         } CUDA_TEXTURE_DESC;
     *   where
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::addressMode
     *         specifies the addressing mode for each dimension of the texture data.
     *         CUaddress_mode is defined as:
     *                 typedef enum
     * CUaddress_mode_enum {
     *             CU_TR_ADDRESS_MODE_WRAP = 0,
     *             CU_TR_ADDRESS_MODE_CLAMP = 1,
     *             CU_TR_ADDRESS_MODE_MIRROR = 2,
     *             CU_TR_ADDRESS_MODE_BORDER = 3
     *         } CUaddress_mode;
     *         This is ignored if
     *         CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. Also, if the
     *         flag, CU_TRSF_NORMALIZED_COORDINATES is not set, the only supported
     *         address mode is CU_TR_ADDRESS_MODE_CLAMP.
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::filterMode
     *         specifies the filtering mode to be used when fetching from the texture.
     *         CUfilter_mode is defined as:
     *                 typedef enum CUfilter_mode_enum
     * {
     *             CU_TR_FILTER_MODE_POINT = 0,
     *             CU_TR_FILTER_MODE_LINEAR = 1
     *         } CUfilter_mode;
     *         This is ignored if
     *         CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR.
     *       
     *     
     *   
     *   
     *   
     *     
     *       
     *         CUDA_TEXTURE_DESC::flags can
     *         be any combination of the following:
     *         
     *           
     *             CU_TRSF_READ_AS_INTEGER,
     *               which suppresses the default behavior of having the texture promote
     *               integer data to floating point data in the range [0,
     *               1]. Note that texture
     *               with 32-bit integer format would not be promoted, regardless of whether
     *               or not this flag is specified.
     *             
     *           
     *           
     *             CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior
     *               of having the texture coordinates range from [0, Dim) where Dim is the
     *               width or height
     *               of the CUDA array.
     *               Instead, the texture coordinates [0, 1.0) reference the entire breadth
     *               of the array dimension; Note that
     *               for CUDA mipmapped
     *               arrays, this flag has to be set.
     *             
     *           
     *         
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::maxAnisotropy
     *         specifies the maximum anistropy ratio to be used when doing anisotropic
     *         filtering. This value will be clamped to the range
     *         [1,16].
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::mipmapFilterMode
     *         specifies the filter mode when the calculated mipmap level lies between
     *         two defined mipmap levels.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::mipmapLevelBias
     *         specifies the offset to be applied to the calculated mipmap level.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::minMipmapLevelClamp
     *         specifies the lower end of the mipmap level range to clamp access to.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_TEXTURE_DESC::maxMipmapLevelClamp
     *         specifies the upper end of the mipmap level range to clamp access to.
     *       
     *     
     *   
     *   
     *   The CUDA_RESOURCE_VIEW_DESC struct is
     *     defined as
     *   
        typedef struct CUDA_RESOURCE_VIEW_DESC_st
     *         {
     *             CUresourceViewFormat format;
     *             size_t width;
     *             size_t height;
     *             size_t depth;
     *             unsigned int firstMipmapLevel;
     *             unsigned int lastMipmapLevel;
     *             unsigned int firstLayer;
     *             unsigned int lastLayer;
     *         } CUDA_RESOURCE_VIEW_DESC;
     *   where:
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::format
     *         specifies how the data contained in the CUDA array or CUDA mipmapped
     *         array should be interpreted. Note that this can incur
     *         a change in size of the texture
     *         data. If the resource view format is a block compressed format, then
     *         the underlying CUDA array
     *         or CUDA mipmapped array has to
     *         have a base of format CU_AD_FORMAT_UNSIGNED_INT32. with 2 or 4 channels,
     *         depending on the block compressed format. For ex., BC1 and BC4 require
     *         the underlying CUDA array to
     *         have a format of
     *         CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats
     *         require the underlying resource to have the same base format but with
     *         4 channels.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::width
     *         specifies the new width of the texture data. If the resource view
     *         format is a block compressed format, this value has to
     *         be 4 times the original width
     *         of the resource. For non block compressed formats, this value has to
     *         be equal to that of the
     *         original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::height
     *         specifies the new height of the texture data. If the resource view
     *         format is a block compressed format, this value has to
     *         be 4 times the original height
     *         of the resource. For non block compressed formats, this value has to
     *         be equal to that of the
     *         original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::depth
     *         specifies the new depth of the texture data. This value has to be equal
     *         to that of the original resource.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed
     *         mipmap level. This will be the new mipmap level zero. For non-mipmapped
     *         resources, this value
     *         has to be
     *         zero.CUDA_TEXTURE_DESC::minMipmapLevelClamp and
     *         CUDA_TEXTURE_DESC::maxMipmapLevelClamp will be relative to this value.
     *         For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp
     *         of 1.2 is specified,
     *         then the actual minimum mipmap
     *         level clamp will be 3.2.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel
     *         specifies the least detailed mipmap level. For non-mipmapped resources,
     *         this value has to be zero.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::firstLayer
     *         specifies the first layer index for layered textures. This will be the
     *         new layer zero. For non-layered resources, this value
     *         has to be zero.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CUDA_RESOURCE_VIEW_DESC::lastLayer
     *         specifies the last layer index for layered textures. For non-layered
     *         resources, this value has to be zero.
     *       
     *     
     *   
     *   
     * 
     *
     * @param pTexObject Texture object to create
     * @param pResDesc Resource descriptor
     * @param pTexDesc Texture descriptor
     * @param pResViewDesc Resource view descriptor
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexObjectDestroy
     */
    public static int cuTexObjectCreate(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc)
    {
        return checkResult(cuTexObjectCreateNative(pTexObject, pResDesc, pTexDesc, pResViewDesc));
    }
    private static native int cuTexObjectCreateNative(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc);

    /**
     * Destroys a texture object.
     *
     *      * CUresult cuTexObjectDestroy (
     *      CUtexObject texObject )
     * 
     * 
     *   Destroys a texture object.  Destroys the
     *     texture object specified by texObject.
     *   
     * 
     *
     * @param texObject Texture object to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectDestroy(CUtexObject texObject)
    {
        return checkResult(cuTexObjectDestroyNative(texObject));
    }
    private static native int cuTexObjectDestroyNative(CUtexObject texObject);


    /**
     * Returns a texture object's resource descriptor.
     *
     *      * CUresult cuTexObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's resource
     *     descriptor.  Returns the resource descriptor for the texture object
     *     specified by texObject.
     *   
     * 
     *
     * @param pResDesc Resource descriptor
     * @param texObject Texture object
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetResourceDescNative(pResDesc, texObject));
    }
    private static native int cuTexObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject);

    /**
     * Returns a texture object's texture descriptor.
     *
     *      * CUresult cuTexObjectGetTextureDesc (
     *      CUDA_TEXTURE_DESC* pTexDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's texture
     *     descriptor.  Returns the texture descriptor for the texture object
     *     specified by texObject.
     *   
     * 
     *
     * @param pTexDesc Texture descriptor
     * @param texObject Texture object
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetTextureDescNative(pTexDesc, texObject));
    }
    private static native int cuTexObjectGetTextureDescNative(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject);

    /**
     * Returns a texture object's resource view descriptor.
     *
     *      * CUresult cuTexObjectGetResourceViewDesc (
     *      CUDA_RESOURCE_VIEW_DESC* pResViewDesc,
     *      CUtexObject texObject )
     * 
     * 
     *   Returns a texture object's resource view
     *     descriptor.  Returns the resource view descriptor for the texture
     *     object specified
     *     by texObject. If no resource
     *     view was set for texObject, the CUDA_ERROR_INVALID_VALUE is
     *     returned.
     *   
     * 
     *
     * @param pResViewDesc Resource view descriptor
     * @param texObject Texture object
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuTexObjectCreate
     */
    public static int cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject)
    {
        return checkResult(cuTexObjectGetResourceViewDescNative(pResViewDesc, texObject));

    }
    private static native int cuTexObjectGetResourceViewDescNative(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject);

    /**
     * Creates a surface object.
     *
     *      * CUresult cuSurfObjectCreate (
     *      CUsurfObject* pSurfObject,
     *      const CUDA_RESOURCE_DESC* pResDesc )
     * 
     * 
     *   Creates a surface object.  Creates a
     *     surface object and returns it in pSurfObject. pResDesc describes the data to perform surface load/stores on.
     *     CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and
     *     CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA
     *     array handle. CUDA_RESOURCE_DESC::flags must be set to zero.
     *   
     *   Surface objects are only supported on
     *     devices of compute capability 3.0 or higher.
     *   
     * 
     *
     * @param pSurfObject Surface object to create
     * @param pResDesc Resource descriptor
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuSurfObjectDestroy
     */
    public static int cuSurfObjectCreate(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc)
    {
        return checkResult(cuSurfObjectCreateNative(pSurfObject, pResDesc));
    }
    private static native int cuSurfObjectCreateNative(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc);

    /**
     * Destroys a surface object.
     *
     *      * CUresult cuSurfObjectDestroy (
     *      CUsurfObject surfObject )
     * 
     * 
     *   Destroys a surface object.  Destroys the
     *     surface object specified by surfObject.
     *   
     * 
     *
     * @param surfObject Surface object to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuSurfObjectCreate
     */
    public static int cuSurfObjectDestroy(CUsurfObject surfObject)
    {
        return checkResult(cuSurfObjectDestroyNative(surfObject));
    }
    private static native int cuSurfObjectDestroyNative(CUsurfObject surfObject);

    /**
     * Returns a surface object's resource descriptor.
     *
     *      * CUresult cuSurfObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUsurfObject surfObject )
     * 
     * 
     *   Returns a surface object's resource
     *     descriptor.  Returns the resource descriptor for the surface object
     *     specified by surfObject.
     *   
     * 
     *
     * @param pResDesc Resource descriptor
     * @param surfObject Surface object
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuSurfObjectCreate
     */
    public static int cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject)
    {
        return checkResult(cuSurfObjectGetResourceDescNative(pResDesc, surfObject));
    }
    private static native int cuSurfObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject);


    /**
     * Queries if a device may directly access a peer device's memory.
     *
     *      * CUresult cuDeviceCanAccessPeer (
     *      int* canAccessPeer,
     *      CUdevice dev,
     *      CUdevice peerDev )
     * 
     * 
     *   Queries if a device may directly access
     *     a peer device's memory.  Returns in *canAccessPeer a value
     *     of 1 if contexts on dev are capable of directly accessing
     *     memory from contexts on peerDev and 0 otherwise. If direct
     *     access of peerDev from dev is possible, then access
     *     may be enabled on two specific contexts by calling
     *     cuCtxEnablePeerAccess().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param canAccessPeer Returned access capability
     * @param dev Device from which allocations on peerDev are to be directly accessed.
     * @param peerDev Device on which the allocations to be directly accessed by dev reside.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuCtxEnablePeerAccess
     * @see JCudaDriver#cuCtxDisablePeerAccess
     */
    public static int cuDeviceCanAccessPeer(int canAccessPeer[], CUdevice dev, CUdevice peerDev)
    {
        return checkResult(cuDeviceCanAccessPeerNative(canAccessPeer, dev, peerDev));
    }
    private static native int cuDeviceCanAccessPeerNative(int canAccessPeer[], CUdevice dev, CUdevice peerDev);


    /**
     * Queries attributes of the link between two devices.

     * 

     * Returns in *value the value of the requested attribute attrib of the
     * link between srcDevice and dstDevice. The supported attributes are:
     * 
     * CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: A relative value indicating the
     *   performance of the link between two devices.
     * CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED P2P: 1 if P2P Access is enable.
     * CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: 1 if Atomic operations over
     *   the link are supported.
     * 
     * Returns ::CUDA_ERROR_INVALID_DEVICE if srcDevice or dstDevice are not valid
     * or if they represent the same device.

     *

     * Returns ::CUDA_ERROR_INVALID_VALUE if attrib is not valid or if value is
     * a null pointer.

     *
     * @param value         Returned value of the requested attribute
     * @param attrib        The requested attribute of the link between \p srcDevice and \p dstDevice.
     * @param srcDevice     The source device of the target link.
     * @param dstDevice     The destination device of the target link.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxEnablePeerAccess
     * @see JCudaDriver#cuCtxDisablePeerAccess
     * @see JCudaDriver#cuCtxCanAccessPeer
     */
    public static int cuDeviceGetP2PAttribute(int value[], int attrib, CUdevice srcDevice, CUdevice dstDevice)
    {
        return checkResult(cuDeviceGetP2PAttributeNative(value, attrib, srcDevice, dstDevice));
    }
    private static native int cuDeviceGetP2PAttributeNative(int value[], int attrib, CUdevice srcDevice, CUdevice dstDevice);

    /**
     * Enables direct access to memory allocations in a peer context.
     *
     *      * CUresult cuCtxEnablePeerAccess (
     *      CUcontext peerContext,
     *      unsigned int  Flags )
     * 
     * 
     *   Enables direct access to memory
     *     allocations in a peer context.  If both the current context and peerContext are on devices which support unified addressing (as
     *     may be queried using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same
     *     major compute capability, then on success all allocations from peerContext will immediately be accessible by the current context.
     *     See Unified Addressing for additional details.
     *   
     *   Note that access granted by this call
     *     is unidirectional and that in order to access memory from the current
     *     context in peerContext, a separate symmetric call to
     *     cuCtxEnablePeerAccess() is required.
     *   

     *   There is a system-wide maximum of eight peer connections per device.
     *   
     *   Returns CUDA_ERROR_PEER_ACCESS_UNSUPPORTED
     *     if cuDeviceCanAccessPeer() indicates that the CUdevice of the current
     *     context cannot directly access memory from the CUdevice of peerContext.
     *   
     *   Returns CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
     *     if direct access of peerContext from the current context has
     *     already been enabled.
     *   
     *   Returns CUDA_ERROR_TOO_MANY_PEERS if
     *     direct peer access is not possible because hardware resources required
     *     for peer access have been exhausted.
     *   
     *   Returns CUDA_ERROR_INVALID_CONTEXT if
     *     there is no current context, peerContext is not a valid
     *     context, or if the current context is peerContext.
     *   
     *   Returns CUDA_ERROR_INVALID_VALUE if Flags is not 0.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param peerContext Peer context to enable direct access to from the current context
     * @param Flags Reserved for future use and must be set to 0
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_TOO_MANY_PEERS,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuDeviceCanAccessPeer
     * @see JCudaDriver#cuCtxDisablePeerAccess
     */
    public static int cuCtxEnablePeerAccess(CUcontext peerContext, int Flags)
    {
        return checkResult(cuCtxEnablePeerAccessNative(peerContext, Flags));
    }
    private static native int cuCtxEnablePeerAccessNative(CUcontext peerContext, int Flags);


    /**
     * Disables direct access to memory allocations in a peer context and unregisters any registered allocations.
     *
     *      * CUresult cuCtxDisablePeerAccess (
     *      CUcontext peerContext )
     * 
     * 
     *   Disables direct access to memory
     *     allocations in a peer context and unregisters any registered allocations.
     *     Returns CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has
     *     not yet been enabled from peerContext to the current
     *     context.
     *   
     *   Returns CUDA_ERROR_INVALID_CONTEXT if
     *     there is no current context, or if peerContext is not a valid
     *     context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param peerContext Peer context to disable direct access to
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_INVALID_CONTEXT,
     *
     * @see JCudaDriver#cuDeviceCanAccessPeer
     * @see JCudaDriver#cuCtxEnablePeerAccess
     */
    public static int cuCtxDisablePeerAccess(CUcontext peerContext)
    {
        return checkResult(cuCtxDisablePeerAccessNative(peerContext));
    }
    private static native int cuCtxDisablePeerAccessNative(CUcontext peerContext);


    /**
     * Sets the parameter size for the function.
     *
     *      * CUresult cuParamSetSize (
     *      CUfunction hfunc,
     *      unsigned int  numbytes )
     * 
     * 
     *   Sets the parameter size for the function.
     *     Deprecated Sets through numbytes
     *     the total size in bytes needed by the function parameters of the kernel
     *     corresponding to hfunc.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to set parameter size for
     * @param numbytes Size of parameter list in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuParamSetSize(CUfunction hfunc, int numbytes)
    {
        return checkResult(cuParamSetSizeNative(hfunc, numbytes));
    }

    private static native int cuParamSetSizeNative(CUfunction hfunc, int numbytes);


    /**
     * Adds an integer parameter to the function's argument list.
     *
     *      * CUresult cuParamSeti (
     *      CUfunction hfunc,
     *      int  offset,
     *      unsigned int  value )
     * 
     * 
     *   Adds an integer parameter to the
     *     function's argument list.
     *     Deprecated Sets an integer parameter that
     *     will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to add parameter to
     * @param offset Offset to add parameter to argument list
     * @param value Value of parameter
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuParamSeti(CUfunction hfunc, int offset, int value)
    {
        return checkResult(cuParamSetiNative(hfunc, offset, value));
    }

    private static native int cuParamSetiNative(CUfunction hfunc, int offset, int value);


    /**
     * Adds a floating-point parameter to the function's argument list.
     *
     *      * CUresult cuParamSetf (
     *      CUfunction hfunc,
     *      int  offset,
     *      float  value )
     * 
     * 
     *   Adds a floating-point parameter to the
     *     function's argument list.
     *     Deprecated Sets a floating-point parameter
     *     that will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to add parameter to
     * @param offset Offset to add parameter to argument list
     * @param value Value of parameter
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuParamSetf(CUfunction hfunc, int offset, float value)
    {
        return checkResult(cuParamSetfNative(hfunc, offset, value));
    }
    private static native int cuParamSetfNative(CUfunction hfunc, int offset, float value);


    /**
     * Adds arbitrary data to the function's argument list.
     *
     *      * CUresult cuParamSetv (
     *      CUfunction hfunc,
     *      int  offset,
     *      void* ptr,
     *      unsigned int  numbytes )
     * 
     * 
     *   Adds arbitrary data to the function's
     *     argument list.
     *     Deprecated Copies an arbitrary amount of
     *     data (specified in numbytes) from ptr into the
     *     parameter space of the kernel corresponding to hfunc. offset is a byte offset.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to add data to
     * @param offset Offset to add data to argument list
     * @param ptr Pointer to arbitrary data
     * @param numbytes Size of data to copy in bytes
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes)
    {
        return checkResult(cuParamSetvNative(hfunc, offset, ptr, numbytes));
    }

    private static native int cuParamSetvNative(CUfunction hfunc, int offset, Pointer ptr, int numbytes);


    /**
     * Adds a texture-reference to the function's argument list.
     *
     *      * CUresult cuParamSetTexRef (
     *      CUfunction hfunc,
     *      int  texunit,
     *      CUtexref hTexRef )
     * 
     * 
     *   Adds a texture-reference to the function's
     *     argument list.
     *     Deprecated Makes the CUDA array or linear
     *     memory bound to the texture reference hTexRef available to a
     *     device program as a texture. In this version of CUDA, the
     *     texture-reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hfunc Kernel to add texture-reference to
     * @param texunit Texture unit (must be CU_PARAM_TR_DEFAULT)
     * @param hTexRef Texture-reference to add to argument list
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef)
    {
        return checkResult(cuParamSetTexRefNative(hfunc, texunit, hTexRef));
    }

    private static native int cuParamSetTexRefNative(CUfunction hfunc, int texunit, CUtexref hTexRef);

    
    
    /**
     * Creates a graph.

     * 

     * Creates an empty graph, which is returned via \p phGraph.
     *
     * @param phGraph - Returns newly created graph
     * @param flags   - Graph creation flags, must be 0
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * 
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     * @see JCudaDriver#cuGraphInstantiate
     * @see JCudaDriver#cuGraphDestroy
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphClone
     */
    public static int cuGraphCreate(CUgraph phGraph, int flags) 
    {
        return checkResult(cuGraphCreateNative(phGraph, flags));
    }
    private static native int cuGraphCreateNative(CUgraph phGraph, int flags);
    

    /**
     * Creates a kernel execution node and adds it to a graph.

     * 

     * Creates a new kernel execution node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.

     * 

     * The CUDA_KERNEL_NODE_PARAMS structure is defined as:

     * 

     * 
     *  typedef struct CUDA_KERNEL_NODE_PARAMS_st {
     *      CUfunction func;
     *      unsigned int gridDimX;
     *      unsigned int gridDimY;
     *      unsigned int gridDimZ;
     *      unsigned int blockDimX;
     *      unsigned int blockDimY;
     *      unsigned int blockDimZ;
     *      unsigned int sharedMemBytes;
     *      void **kernelParams;
     *      void **extra;
     *  } CUDA_KERNEL_NODE_PARAMS;
     * 
     * 

     * When the graph is launched, the node will invoke kernel \p func on a (\p gridDimX x
     * \p gridDimY x \p gridDimZ) grid of blocks. Each block contains
     * (\p blockDimX x \p blockDimY x \p blockDimZ) threads.

     * 

     * \p sharedMemBytes sets the amount of dynamic shared memory that will be
     * available to each thread block.

     * 

     * Kernel parameters to \p func can be specified in one of two ways:

     * 

     * 1) Kernel parameters can be specified via \p kernelParams. If the kernel has N
     * parameters, then \p kernelParams needs to be an array of N pointers. Each pointer,
     * from \p kernelParams[0] to \p kernelParams[N-1], points to the region of memory from which the actual
     * parameter will be copied. The number of kernel parameters and their offsets and sizes do not need
     * to be specified as that information is retrieved directly from the kernel's image.

     * 

     * 2) Kernel parameters can also be packaged by the application into a single buffer that is passed in
     * via \p extra. This places the burden on the application of knowing each kernel
     * parameter's size and alignment/padding within the buffer. The \p extra parameter exists
     * to allow this function to take additional less commonly used arguments. \p extra specifies
     * a list of names of extra settings and their corresponding values. Each extra setting name is
     * immediately followed by the corresponding value. The list must be terminated with either NULL or
     * CU_LAUNCH_PARAM_END.

     * 

     * 
     *  ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra
     *   array;
     *  ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next
     *   value in \p extra will be a pointer to a buffer
     *   containing all the kernel parameters for launching kernel
     *   \p func;
     *  ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next
     *   value in \p extra will be a pointer to a size_t
     *   containing the size of the buffer specified with
     *   ::CU_LAUNCH_PARAM_BUFFER_POINTER;
     * 
     * 

     * The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters are specified with both
     * \p kernelParams and \p extra (i.e. both \p kernelParams and
     * \p extra are non-NULL).

     * 

     * The \p kernelParams or \p extra array, as well as the argument values it points to,
     * are copied during this call.
     * 

     * Kernels launched using graphs must not use texture and surface references. Reading or
     * writing through any texture or surface reference is undefined behavior.
     * This restriction does not apply to texture and surface objects.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     * @param nodeParams      - Parameters for the GPU execution node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuLaunchKernel
     * @see JCudaDriver#cuGraphKernelNodeGetParams
     * @see JCudaDriver#cuGraphKernelNodeSetParams
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     */
    public static int cuGraphAddKernelNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_KERNEL_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphAddKernelNodeNative(phGraphNode, hGraph, dependencies, numDependencies, nodeParams));
    }
    private static native int cuGraphAddKernelNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_KERNEL_NODE_PARAMS nodeParams);
    

    /**
     * Returns a kernel node's parameters.

     * 

     * Returns the parameters of kernel node \p hNode in \p nodeParams.
     * The \p kernelParams or \p extra array returned in \p nodeParams,
     * as well as the argument values it points to, are owned by the node.
     * This memory remains valid until the node is destroyed or its
     * parameters are modified, and should not be modified
     * directly. Use ::cuGraphKernelNodeSetParams to update the
     * parameters of this node.

     * 

     * The params will contain either \p kernelParams or \p extra,
     * according to which of these was most recently set on the node.
     *
     * @param hNode      - Node to get the parameters for
     * @param nodeParams - Pointer to return the parameters
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuLaunchKernel
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphKernelNodeSetParams
     */
    public static int cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphKernelNodeGetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphKernelNodeGetParamsNative(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams);
    

    /**
     * Sets a kernel node's parameters.
     *
     * Sets the parameters of kernel node \p hNode to \p nodeParams.
     *
     * @param hNode      - Node to set the parameters for
     * @param nodeParams - Parameters to copy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * 
     * @see JCudaDriver#cuLaunchKernel
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphKernelNodeGetParams
     */
    public static int cuGraphKernelNodeSetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphKernelNodeSetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphKernelNodeSetParamsNative(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams);
    

    /**
     * Creates a memcpy node and adds it to a graph.

     * 

     * Creates a new memcpy node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.

     * 

     * When the graph is launched, the node will perform the memcpy described by \p copyParams.
     * See ::cuMemcpy3D() for a description of the structure and its restrictions.

     * 

     * Memcpy nodes have some additional restrictions with regards to managed memory, if the
     * system contains at least one device which has a zero value for the device attribute
     * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer
     * to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed
     * for those operand(s). The managed memory will be treated as residing on either the
     * host or the device, depending on which memory type is specified.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     * @param copyParams      - Parameters for the memory copy
     * @param ctx             - Context on which to run the node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuGraphMemcpyNodeGetParams
     * @see JCudaDriver#cuGraphMemcpyNodeSetParams
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     */
    public static int cuGraphAddMemcpyNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMCPY3D copyParams, CUcontext ctx) 
    {
        return checkResult(cuGraphAddMemcpyNodeNative(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx));
    }
    private static native int cuGraphAddMemcpyNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMCPY3D copyParams, CUcontext ctx);
    

    /**
     * Returns a memcpy node's parameters.

     * 

     * Returns the parameters of memcpy node \p hNode in \p nodeParams.
     *
     * @param hNode      - Node to get the parameters for
     * @param nodeParams - Pointer to return the parameters
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphMemcpyNodeSetParams
     */
    public static int cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams) 
    {
        return checkResult(cuGraphMemcpyNodeGetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphMemcpyNodeGetParamsNative(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams);
    

    /**
     * Sets a memcpy node's parameters.

     * 

     * Sets the parameters of memcpy node \p hNode to \p nodeParams.
     *
     * @param hNode      - Node to set the parameters for
     * @param nodeParams - Parameters to copy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * 
     * @see JCudaDriver#cuMemcpy3D
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphMemcpyNodeGetParams
     */
    public static int cuGraphMemcpyNodeSetParams(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams) 
    {
        return checkResult(cuGraphMemcpyNodeSetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphMemcpyNodeSetParamsNative(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams);
    

    /**
     * Creates a memset node and adds it to a graph.

     * 

     * Creates a new memset node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.

     * 

     * The element size must be 1, 2, or 4 bytes.
     * When the graph is launched, the node will perform the memset described by \p memsetParams.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     * @param memsetParams    - Parameters for the memory set
     * @param ctx             - Context on which to run the node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_CONTEXT
     *
     * 
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuGraphMemsetNodeGetParams
     * @see JCudaDriver#cuGraphMemsetNodeSetParams
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     */
    public static int cuGraphAddMemsetNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx) 
    {
        return checkResult(cuGraphAddMemsetNodeNative(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx));
    }
    private static native int cuGraphAddMemsetNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx);
    

    /**
     * Returns a memset node's parameters.

     * 

     * Returns the parameters of memset node \p hNode in \p nodeParams.
     *
     * @param hNode      - Node to get the parameters for
     * @param nodeParams - Pointer to return the parameters
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuGraphAddMemsetNode
     * @see JCudaDriver#cuGraphMemsetNodeSetParams
     */
    public static int cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphMemsetNodeGetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphMemsetNodeGetParamsNative(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams);
    

    /**
     * Sets a memset node's parameters.

     * 

     * Sets the parameters of memset node \p hNode to \p nodeParams.
     *
     * @param hNode      - Node to set the parameters for
     * @param nodeParams - Parameters to copy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuMemsetD2D32
     * @see JCudaDriver#cuGraphAddMemsetNode
     * @see JCudaDriver#cuGraphMemsetNodeGetParams
     */
    public static int cuGraphMemsetNodeSetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphMemsetNodeSetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphMemsetNodeSetParamsNative(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams);
    

    /**
     * Creates a host execution node and adds it to a graph.

     * 

     * Creates a new CPU execution node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.

     * 

     * When the graph is launched, the node will invoke the specified CPU function.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     * @param nodeParams      - Parameters for the host node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuLaunchHostFunc
     * @see JCudaDriver#cuGraphHostNodeGetParams
     * @see JCudaDriver#cuGraphHostNodeSetParams
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     */
    public static int cuGraphAddHostNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_HOST_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphAddHostNodeNative(phGraphNode, hGraph, dependencies, numDependencies, nodeParams));
    }
    private static native int cuGraphAddHostNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_HOST_NODE_PARAMS nodeParams);
    

    /**
     * Returns a host node's parameters.

     * 

     * Returns the parameters of host node \p hNode in \p nodeParams.
     *
     * @param hNode      - Node to get the parameters for
     * @param nodeParams - Pointer to return the parameters
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuLaunchHostFunc
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphHostNodeSetParams
     */
    public static int cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphHostNodeGetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphHostNodeGetParamsNative(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams);
    

    /**
     * Sets a host node's parameters.

     * 

     * Sets the parameters of host node \p hNode to \p nodeParams.
     *
     * @param hNode      - Node to set the parameters for
     * @param nodeParams - Parameters to copy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuLaunchHostFunc
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphHostNodeGetParams
     */
    public static int cuGraphHostNodeSetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams) 
    {
        return checkResult(cuGraphHostNodeSetParamsNative(hNode, nodeParams));
    }
    private static native int cuGraphHostNodeSetParamsNative(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams);
    

    /**
     * Creates a child graph node and adds it to a graph.

     * 

     * Creates a new node which executes an embedded graph, and adds it to \p hGraph with
     * \p numDependencies dependencies specified via \p dependencies.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.

     * 

     * The node executes an embedded child graph. The child graph is cloned in this call.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     * @param childGraph      - The graph to clone into this node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * 
     * @see JCudaDriver#cuGraphChildGraphNodeGetGraph
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     * @see JCudaDriver#cuGraphClone
     */
    public static int cuGraphAddChildGraphNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUgraph childGraph) 
    {
        return checkResult(cuGraphAddChildGraphNodeNative(phGraphNode, hGraph, dependencies, numDependencies, childGraph));
    }
    private static native int cuGraphAddChildGraphNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUgraph childGraph);
    

    /**
     * Gets a handle to the embedded graph of a child graph node.

     * 

     * Gets a handle to the embedded graph in a child graph node. This call
     * does not clone the graph. Changes to the graph will be reflected in
     * the node, and the node retains ownership of the graph.
     *
     * @param hNode   - Node to get the embedded graph for
     * @param phGraph - Location to store a handle to the graph
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * 
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphNodeFindInClone
     */
    public static int cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph phGraph) 
    {
        return checkResult(cuGraphChildGraphNodeGetGraphNative(hNode, phGraph));
    }
    private static native int cuGraphChildGraphNodeGetGraphNative(CUgraphNode hNode, CUgraph phGraph);
    

    /**
     * Creates an empty node and adds it to a graph.

     * 

     * Creates a new node which performs no operation, and adds it to \p hGraph with
     * \p numDependencies dependencies specified via \p dependencies.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.
     *
     * An empty node performs no operation during execution, but can be used for
     * transitive ordering. For example, a phased execution graph with 2 groups of n
     * nodes with a barrier between them can be represented using an empty node and
     * 2*n dependency edges, rather than no empty node and n^2 dependency edges.
     *
     * @param phGraphNode     - Returns newly created node
     * @param hGraph          - Graph to which to add the node
     * @param dependencies    - Dependencies of the node
     * @param numDependencies - Number of dependencies
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * 
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphDestroyNode
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     */
    public static int cuGraphAddEmptyNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies) 
    {
        return checkResult(cuGraphAddEmptyNodeNative(phGraphNode, hGraph, dependencies, numDependencies));
    }
    private static native int cuGraphAddEmptyNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies);
    
    
    /**
     * Creates an event record node and adds it to a graph.
.

     *
     * Creates a new event record node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies and arguments specified in \p params.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.
     *
     * Each launch of the graph will record \p event to capture execution of the
     * node's dependencies.
     *
     * @param phGraphNode Returns newly created node
     * @param hGraph Graph to which to add the node
     * @param dependencies Dependencies of the node
     * @param numDependencies Number of dependencies
     * @param event Event for the node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_NOT_SUPPORTED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphAddEventWaitNode,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent,
     * @see JCudaDriver#cuGraphCreate,
     * @see JCudaDriver#cuGraphDestroyNode,
     * @see JCudaDriver#cuGraphAddChildGraphNode,
     * @see JCudaDriver#cuGraphAddEmptyNode,
     * @see JCudaDriver#cuGraphAddKernelNode,
     * @see JCudaDriver#cuGraphAddMemcpyNode,
     * @see JCudaDriver#cuGraphAddMemsetNode,
     */
    public static int cuGraphAddEventRecordNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event)
    {
        return checkResult(cuGraphAddEventRecordNodeNative(phGraphNode, hGraph, dependencies, numDependencies, event));
    }
    private static native int cuGraphAddEventRecordNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event);
     
    /**
     * Returns the event associated with an event record node.

     *
     * Returns the event of event record node \p hNode in \p event_out.
     *
     * @param hNode Node to get the event for
     * @param event_out Pointer to return the event
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphAddEventRecordNode,
     * @see JCudaDriver#cuGraphEventRecordNodeSetEvent,
     * @see JCudaDriver#cuGraphEventWaitNodeGetEvent,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent event_out)
    {
        return checkResult(cuGraphEventRecordNodeGetEventNative(hNode, event_out));
    }
    private static native int cuGraphEventRecordNodeGetEventNative(CUgraphNode hNode, CUevent event_out);
    
    /**
     * Sets an event record node's event.

     *
     * Sets the event of event record node \p hNode to \p event.
     *
     * @param hNode Node to set the event for
     * @param event Event to use
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuGraphAddEventRecordNode,
     * @see JCudaDriver#cuGraphEventRecordNodeGetEvent,
     * @see JCudaDriver#cuGraphEventWaitNodeSetEvent,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event)
    {
        return checkResult(cuGraphEventRecordNodeSetEventNative(hNode, event));
    }
    private static native int cuGraphEventRecordNodeSetEventNative(CUgraphNode hNode, CUevent event);

    /**
     * Creates an event wait node and adds it to a graph.

     *
     * Creates a new event wait node and adds it to \p hGraph with \p numDependencies
     * dependencies specified via \p dependencies and arguments specified in \p params.
     * It is possible for \p numDependencies to be 0, in which case the node will be placed
     * at the root of the graph. \p dependencies may not have any duplicate entries.
     * A handle to the new node will be returned in \p phGraphNode.
     *
     * The graph node will wait for all work captured in \p event.  See @see JCudaDriver#cuEventRecord()
     * for details on what is captured by an event. \p event may be from a different context
     * or device than the launch stream.
     *
     * @param phGraphNode Returns newly created node
     * @param hGraph Graph to which to add the node
     * @param dependencies Dependencies of the node
     * @param numDependencies Number of dependencies
     * @param event Event for the node
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_NOT_SUPPORTED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphAddEventRecordNode,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent,
     * @see JCudaDriver#cuGraphCreate,
     * @see JCudaDriver#cuGraphDestroyNode,
     * @see JCudaDriver#cuGraphAddChildGraphNode,
     * @see JCudaDriver#cuGraphAddEmptyNode,
     * @see JCudaDriver#cuGraphAddKernelNode,
     * @see JCudaDriver#cuGraphAddMemcpyNode,
     * @see JCudaDriver#cuGraphAddMemsetNode,
     */
    public static int cuGraphAddEventWaitNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event)
    {
        return checkResult(cuGraphAddEventWaitNodeNative(phGraphNode, hGraph, dependencies, numDependencies, event));
    }
    private static native int cuGraphAddEventWaitNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event);

    /**
     * Returns the event associated with an event wait node.

     *
     * Returns the event of event wait node \p hNode in \p event_out.
     *
     * @param hNode Node to get the event for
     * @param event_out Pointer to return the event
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphAddEventWaitNode,
     * @see JCudaDriver#cuGraphEventWaitNodeSetEvent,
     * @see JCudaDriver#cuGraphEventRecordNodeGetEvent,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent event_out)
    {
        return checkResult(cuGraphEventWaitNodeGetEventNative(hNode, event_out));
    }
    private static native int cuGraphEventWaitNodeGetEventNative(CUgraphNode hNode, CUevent event_out);

    /**
     * Sets an event wait node's event.

     *
     * Sets the event of event wait node \p hNode to \p event.
     *
     * @param hNode Node to set the event for
     * @param event Event to use
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuGraphAddEventWaitNode,
     * @see JCudaDriver#cuGraphEventWaitNodeGetEvent,
     * @see JCudaDriver#cuGraphEventRecordNodeSetEvent,
     * @see JCudaDriver#cuEventRecord,
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event)
    {
        return checkResult(cuGraphEventWaitNodeSetEventNative(hNode, event));
    }
    private static native int cuGraphEventWaitNodeSetEventNative(CUgraphNode hNode, CUevent event);    

    
    /**
     * Clones a graph.

     * 

     * This function creates a copy of \p originalGraph and returns it in \p * phGraphClone.
     * All parameters are copied into the cloned graph. The original graph may be modified
     * after this call without affecting the clone.

     * 

     * Child graph nodes in the original graph are recursively copied into the clone.
     *
     * @param phGraphClone  - Returns newly created cloned graph
     * @param originalGraph - Graph to clone
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * 
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphNodeFindInClone
     */
    public static int cuGraphClone(CUgraph phGraphClone, CUgraph originalGraph) 
    {
        return checkResult(cuGraphCloneNative(phGraphClone, originalGraph));
    }
    private static native int cuGraphCloneNative(CUgraph phGraphClone, CUgraph originalGraph);
    

    /**
     * Finds a cloned version of a node.

     * 

     * This function returns the node in \p hClonedGraph corresponding to \p hOriginalNode
     * in the original graph.

     * 

     * \p hClonedGraph must have been cloned from \p hOriginalGraph via ::cuGraphClone.
     * \p hOriginalNode must have been in \p hOriginalGraph at the time of the call to
     * ::cuGraphClone, and the corresponding cloned node in \p hClonedGraph must not have
     * been removed. The cloned node is then returned via \p phClonedNode.
     *
     * @param phNode  - Returns handle to the cloned node
     * @param hOriginalNode - Handle to the original node
     * @param hClonedGraph - Cloned graph to query
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * @see JCudaDriver#cuGraphClone
     */
    public static int cuGraphNodeFindInClone(CUgraphNode phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) 
    {
        return checkResult(cuGraphNodeFindInCloneNative(phNode, hOriginalNode, hClonedGraph));
    }
    private static native int cuGraphNodeFindInCloneNative(CUgraphNode phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
    

    /**
     * Returns a node's type.

     * 

     * Returns the node type of \p hNode in \p type.
     *
     * @param hNode - Node to query
     * @param type  - Pointer to return the node type
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphChildGraphNodeGetGraph
     * @see JCudaDriver#cuGraphKernelNodeGetParams
     * @see JCudaDriver#cuGraphKernelNodeSetParams
     * @see JCudaDriver#cuGraphHostNodeGetParams
     * @see JCudaDriver#cuGraphHostNodeSetParams
     * @see JCudaDriver#cuGraphMemcpyNodeGetParams
     * @see JCudaDriver#cuGraphMemcpyNodeSetParams
     * @see JCudaDriver#cuGraphMemsetNodeGetParams
     * @see JCudaDriver#cuGraphMemsetNodeSetParams
     */
    public static int cuGraphNodeGetType(CUgraphNode hNode, int type[]) 
    {
        return checkResult(cuGraphNodeGetTypeNative(hNode, type));
    }
    private static native int cuGraphNodeGetTypeNative(CUgraphNode hNode, int type[]);
    

    /**
     * Returns a graph's nodes.

     * 

     * Returns a list of \p hGraph's nodes. \p nodes may be NULL, in which case this
     * function will return the number of nodes in \p numNodes. Otherwise,
     * \p numNodes entries will be filled in. If \p numNodes is higher than the actual
     * number of nodes, the remaining entries in \p nodes will be set to NULL, and the
     * number of nodes actually obtained will be returned in \p numNodes.
     *
     * @param hGraph   - Graph to query
     * @param nodes    - Pointer to return the nodes
     * @param numNodes - See description
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphNodeGetType
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     */
    public static int cuGraphGetNodes(CUgraph hGraph, CUgraphNode nodes[], long numNodes[]) 
    {
        return checkResult(cuGraphGetNodesNative(hGraph, nodes, numNodes));
    }
    private static native int cuGraphGetNodesNative(CUgraph hGraph, CUgraphNode nodes[], long numNodes[]);
    

    /**
     * Returns a graph's root nodes.

     * 

     * Returns a list of \p hGraph's root nodes. \p rootNodes may be NULL, in which case this
     * function will return the number of root nodes in \p numRootNodes. Otherwise,
     * \p numRootNodes entries will be filled in. If \p numRootNodes is higher than the actual
     * number of root nodes, the remaining entries in \p rootNodes will be set to NULL, and the
     * number of nodes actually obtained will be returned in \p numRootNodes.
     *
     * @param hGraph       - Graph to query
     * @param rootNodes    - Pointer to return the root nodes
     * @param numRootNodes - See description
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphNodeGetType
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     */
    public static int cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode rootNodes[], long numRootNodes[]) 
    {
        return checkResult(cuGraphGetRootNodesNative(hGraph, rootNodes, numRootNodes));
    }
    private static native int cuGraphGetRootNodesNative(CUgraph hGraph, CUgraphNode rootNodes[], long numRootNodes[]);
    

    /**
     * Returns a graph's dependency edges.

     * 

     * Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding
     * indices in \p from and \p to; that is, the node in \p to[i] has a dependency on the
     * node in \p from[i]. \p from and \p to may both be NULL, in which
     * case this function only returns the number of edges in \p numEdges. Otherwise,
     * \p numEdges entries will be filled in. If \p numEdges is higher than the actual
     * number of edges, the remaining entries in \p from and \p to will be set to NULL, and
     * the number of edges actually returned will be written to \p numEdges.
     *
     * @param hGraph   - Graph to get the edges from
     * @param from     - Location to return edge endpoints
     * @param to       - Location to return edge endpoints
     * @param numEdges - See description
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphAddDependencies
     * @see JCudaDriver#cuGraphRemoveDependencies
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     */
    public static int cuGraphGetEdges(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numEdges[]) 
    {
        return checkResult(cuGraphGetEdgesNative(hGraph, from, to, numEdges));
    }
    private static native int cuGraphGetEdgesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numEdges[]);
    

    /**
     * Returns a node's dependencies.

     * 

     * Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this
     * function will return the number of dependencies in \p numDependencies. Otherwise,
     * \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual
     * number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the
     * number of nodes actually obtained will be returned in \p numDependencies.
     *
     * @param hNode           - Node to query
     * @param dependencies    - Pointer to return the dependencies
     * @param numDependencies - See description
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphAddDependencies
     * @see JCudaDriver#cuGraphRemoveDependencies
     */
    public static int cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode dependencies[], long numDependencies[]) 
    {
        return checkResult(cuGraphNodeGetDependenciesNative(hNode, dependencies, numDependencies));
    }
    private static native int cuGraphNodeGetDependenciesNative(CUgraphNode hNode, CUgraphNode dependencies[], long numDependencies[]);
    

    /**
     * Returns a node's dependent nodes.

     * 

     * Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which
     * case this function will return the number of dependent nodes in \p numDependentNodes.
     * Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is
     * higher than the actual number of dependent nodes, the remaining entries in
     * \p dependentNodes will be set to NULL, and the number of nodes actually obtained will
     * be returned in \p numDependentNodes.
     *
     * @param hNode             - Node to query
     * @param dependentNodes    - Pointer to return the dependent nodes
     * @param numDependentNodes - See description
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphGetNodes
     * @see JCudaDriver#cuGraphGetRootNodes
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphAddDependencies
     * @see JCudaDriver#cuGraphRemoveDependencies
     */
    public static int cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode dependentNodes[], long numDependentNodes[]) 
    {
        return checkResult(cuGraphNodeGetDependentNodesNative(hNode, dependentNodes, numDependentNodes));
    }
    private static native int cuGraphNodeGetDependentNodesNative(CUgraphNode hNode, CUgraphNode dependentNodes[], long numDependentNodes[]);
    

    /**
     * Adds dependency edges to a graph.

     * 

     * The number of dependencies to be added is defined by \p numDependencies
     * Elements in \p from and \p to at corresponding indices define a dependency.
     * Each node in \p from and \p to must belong to \p hGraph.

     * 

     * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
     * Specifying an existing dependency will return an error.

     *
     * @param hGraph - Graph to which dependencies are added
     * @param from - Array of nodes that provide the dependencies
     * @param to - Array of dependent nodes
     * @param numDependencies - Number of dependencies to be added
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphRemoveDependencies
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     */
    public static int cuGraphAddDependencies(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies) 
    {
        return checkResult(cuGraphAddDependenciesNative(hGraph, from, to, numDependencies));
    }
    private static native int cuGraphAddDependenciesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies);
    

    /**
     * Removes dependency edges from a graph.

     * 

     * The number of \p dependencies to be removed is defined by \p numDependencies.
     * Elements in \p from and \p to at corresponding indices define a dependency.
     * Each node in \p from and \p to must belong to \p hGraph.

     * 

     * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
     * Specifying a non-existing dependency will return an error.
     *
     * @param hGraph - Graph from which to remove dependencies
     * @param from - Array of nodes that provide the dependencies
     * @param to - Array of dependent nodes
     * @param numDependencies - Number of dependencies to be removed
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphAddDependencies
     * @see JCudaDriver#cuGraphGetEdges
     * @see JCudaDriver#cuGraphNodeGetDependencies
     * @see JCudaDriver#cuGraphNodeGetDependentNodes
     */
    public static int cuGraphRemoveDependencies(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies) 
    {
        return checkResult(cuGraphRemoveDependenciesNative(hGraph, from, to, numDependencies));
    }
    private static native int cuGraphRemoveDependenciesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies);
    

    /**
     * Remove a node from the graph.

     * 

     * Removes \p hNode from its graph. This operation also severs any dependencies of other nodes
     * on \p hNode and vice versa.

     *
     * @param hNode  - Node to remove
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphAddChildGraphNode
     * @see JCudaDriver#cuGraphAddEmptyNode
     * @see JCudaDriver#cuGraphAddKernelNode
     * @see JCudaDriver#cuGraphAddHostNode
     * @see JCudaDriver#cuGraphAddMemcpyNode
     * @see JCudaDriver#cuGraphAddMemsetNode
     */
    public static int cuGraphDestroyNode(CUgraphNode hNode) 
    {
        return checkResult(cuGraphDestroyNodeNative(hNode));
    }
    private static native int cuGraphDestroyNodeNative(CUgraphNode hNode);
    

    /**
     * Creates an executable graph from a graph.

     * 

     * Instantiates \p hGraph as an executable graph. The graph is validated for any
     * structural constraints or intra-node constraints which were not previously
     * validated. If instantiation is successful, a handle to the instantiated graph
     * is returned in \p graphExec.

     * 

     * If there are any errors, diagnostic information may be returned in \p errorNode and
     * \p logBuffer. This is the primary way to inspect instantiation errors. The output
     * will be null terminated unless the diagnostics overflow
     * the buffer. In this case, they will be truncated, and the last byte can be
     * inspected to determine if truncation occurred.

     *
     * @param phGraphExec - Returns instantiated graph
     * @param hGraph      - Graph to instantiate
     * @param phErrorNode - In case of an instantiation error, this may be modified to
     *                      indicate a node contributing to the error
     * @param logBuffer   - A character buffer to store diagnostic messages
     * @param bufferSize  - Size of the log buffer in bytes
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * 
     * @see JCudaDriver#cuGraphCreate
     * @see JCudaDriver#cuGraphLaunch
     * @see JCudaDriver#cuGraphExecDestroy
     */
    public static int cuGraphInstantiate(CUgraphExec phGraphExec, CUgraph hGraph, CUgraphNode phErrorNode, byte logBuffer[], long bufferSize) 
    {
        return checkResult(cuGraphInstantiateNative(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize));
    }
    private static native int cuGraphInstantiateNative(CUgraphExec phGraphExec, CUgraph hGraph, CUgraphNode phErrorNode, byte logBuffer[], long bufferSize);

    
    /**
     * Sets the parameters for a kernel node in the given graphExec.

     * 

     * Sets the parameters of a kernel node in an executable graph \p hGraphExec. 
     * The node is identified by the corresponding node \p hNode in the 
     * non-executable graph, from which the executable graph was instantiated.
 
     * 

     * \p hNode must not have been removed from the original graph. The \p func field 
     * of \p nodeParams cannot be modified and must match the original value.
     * All other values can be modified.

     * 

     * The modifications take effect at the next launch of \p hGraphExec. Already 
     * enqueued or running launches of \p hGraphExec are not affected by this call. 
     * \p hNode is also not modified by this call.

     * 

     * @param hGraphExec  - The executable graph in which to set the specified node
     * @param hNode       - kernel node from the graph from which graphExec was instantiated
     * @param nodeParams  - Updated Parameters to set
     * 
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_INVALID_VALUE,
     *
     * @see JCudaDriver#cuGraphAddKernelNode,
     * @see JCudaDriver#cuGraphKernelNodeSetParams,
     * @see JCudaDriver#cuGraphInstantiate
     */
     public static int cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams)
     {
         return checkResult(cuGraphExecKernelNodeSetParamsNative(hGraphExec, hNode, nodeParams));
     }
     private static native int cuGraphExecKernelNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams);
    
     
     /**
      * Sets the parameters for a memcpy node in the given graphExec.

      * 

      * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had 
      * contained \p copyParams at instantiation.  hNode must remain in the graph which was 
      * used to instantiate \p hGraphExec.  Changed edges to and from hNode are ignored.

      * 

      * The source and destination memory in \p copyParams must be allocated from the same 
      * contexts as the original source and destination memory.  Both the instantiation-time 
      * memory operands and the memory operands in \p copyParams must be 1-dimensional.
      * Zero-length operations are not supported.

      * 

      * The modifications only affect future launches of \p hGraphExec.  Already enqueued 
      * or running launches of \p hGraphExec are not affected by this call.  hNode is also 
      * not modified by this call.

      * 

      * Returns CUDA_ERROR_INVALID_VALUE if the memory operands’ mappings changed or
      * either the original or new memory operands are multidimensional.
      * 

      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode Memcpy node from the graph which was used to instantiate graphExec
      * @param copyParams The updated parameters to set
      * @param ctx Context on which to run the node
      *
      * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
      * 
      * @see JCudaDriver#cuGraphInstantiate, 
      * @see JCudaDriver#cuGraphExecKernelNodeSetParams 
      * @see JCudaDriver#cuGraphExecMemsetNodeSetParams
      * @see JCudaDriver#cuGraphExecHostNodeSetParams
      */
     public static int cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMCPY3D copyParams, CUcontext ctx)
     {
         return checkResult(cuGraphExecMemcpyNodeSetParamsNative(hGraphExec, hNode, copyParams, ctx));
     }
     private static native int cuGraphExecMemcpyNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMCPY3D copyParams, CUcontext ctx);

     /**
      * Sets the parameters for a memset node in the given graphExec.

      * 

      * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had 
      * contained \p memsetParams at instantiation.  hNode must remain in the graph which was 
      * used to instantiate \p hGraphExec.  Changed edges to and from hNode are ignored.

      * 

      * The destination memory in \p memsetParams must be allocated from the same 
      * contexts as the original destination memory.  Both the instantiation-time 
      * memory operand and the memory operand in \p memsetParams must be 1-dimensional.
      * Zero-length operations are not supported.

      * 

      * The modifications only affect future launches of \p hGraphExec.  Already enqueued 
      * or running launches of \p hGraphExec are not affected by this call.  hNode is also 
      * not modified by this call.

      * 

      * Returns CUDA_ERROR_INVALID_VALUE if the memory operand’s mappings changed or
      * either the original or new memory operand are multidimensional.
      *
      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode Memset node from the graph which was used to instantiate graphExec
      * @param memsetParams The updated parameters to set
      * @param ctx  Context on which to run the node
      *
      * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
      *
      * @see JCudaDriver#cuGraphInstantiate 
      * @see JCudaDriver#cuGraphExecKernelNodeSetParams 
      * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams 
      * @see JCudaDriver#cuGraphExecHostNodeSetParams
      */
     public static int cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx)
     {
         return checkResult(cuGraphExecMemsetNodeSetParamsNative(hGraphExec, hNode, memsetParams, ctx));
     }
     private static native int cuGraphExecMemsetNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx);

     /**
      * Sets the parameters for a host node in the given graphExec.

      * 

      * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had 
      * contained \p nodeParams at instantiation.  hNode must remain in the graph which was 
      * used to instantiate \p hGraphExec.  Changed edges to and from hNode are ignored.

      * 

      * The modifications only affect future launches of \p hGraphExec.  Already enqueued 
      * or running launches of \p hGraphExec are not affected by this call.  hNode is also 
      * not modified by this call.

      *
      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode Host node from the graph which was used to instantiate graphExec
      * @param nodeParams The updated parameters to set
      *
      * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
      * 
      * @see JCudaDriver#cuGraphInstantiate
      * @see JCudaDriver#cuGraphExecKernelNodeSetParams 
      * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams 
      * @see JCudaDriver#cuGraphExecMemsetNodeSetParams 
      */
     public static int cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams)
     {
         return checkResult(cuGraphExecHostNodeSetParamsNative(hGraphExec, hNode, nodeParams));
     }
     private static native int cuGraphExecHostNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams);

     
     /**
      * Updates node parameters in the child graph node in the given graphExec.


      *
      * Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained
      * in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation.
      * \p hNode must remain in the graph which was used to instantiate \p hGraphExec.
      * Changed edges to and from \p hNode are ignored.


      *
      * The modifications only affect future launches of \p hGraphExec.  Already enqueued 
      * or running launches of \p hGraphExec are not affected by this call.  \p hNode is also 
      * not modified by this call.


      *
      * The topology of \p childGraph, as well as the node insertion order,  must match that
      * of the graph contained in \p hNode.  See ::cuGraphExecUpdate() for a list of restrictions
      * on what can be updated in an instantiated graph.  The update is recursive, so child graph
      * nodes contained within the top level child graph will also be updated.


      *
      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode      Host node from the graph which was used to instantiate graphExec
      * @param childGraph The graph supplying the updated parameters
      *
      * @return
      * CUDA_SUCCESS,
      * CUDA_ERROR_INVALID_VALUE,
      *
      * @see JCudaDriver#cuGraphInstantiate
      * @see JCudaDriver#cuGraphExecUpdate
      * @see JCudaDriver#cuGraphExecKernelNodeSetParams
      * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams
      * @see JCudaDriver#cuGraphExecMemsetNodeSetParams 
      */
     public static int cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph)
     {
         return checkResult(cuGraphExecChildGraphNodeSetParamsNative(hGraphExec, hNode, childGraph));
     }
     private static native int cuGraphExecChildGraphNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);

     /**
      * Sets the event for an event record node in the given graphExec.


      *
      * Sets the event of an event record node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.


      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.


      *
      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode      event record node from the graph from which graphExec was instantiated
      * @param event      Updated event to use
      *
      * @return
      * CUDA_SUCCESS,
      * CUDA_ERROR_INVALID_VALUE,
      *
      * @see JCudaDriver#cuGraphAddEventRecordNode
      * @see JCudaDriver#cuGraphEventRecordNodeGetEvent
      * @see JCudaDriver#cuGraphEventWaitNodeSetEvent
      * @see JCudaDriver#cuEventRecord
      * @see JCudaDriver#cuStreamWaitEvent
      * @see JCudaDriver#cuGraphCreate
      * @see JCudaDriver#cuGraphDestroyNode
      * @see JCudaDriver#cuGraphInstantiate
      */
     public static int cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event)
     {
         return checkResult(cuGraphExecEventRecordNodeSetEventNative(hGraphExec, hNode, event));
     }
     private static native int cuGraphExecEventRecordNodeSetEventNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
     
     /**
      * Sets the event for an event record node in the given graphExec.


      *
      * Sets the event of an event record node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.


      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.


      *
      * @param hGraphExec The executable graph in which to set the specified node
      * @param hNode      event wait node from the graph from which graphExec was instantiated
      * @param event      Updated event to use
      *
      * @return
      * CUDA_SUCCESS,
      * CUDA_ERROR_INVALID_VALUE,
      *
      * @see JCudaDriver#cuGraphAddEventWaitNode
      * @see JCudaDriver#cuGraphEventWaitNodeGetEvent
      * @see JCudaDriver#cuGraphEventRecordNodeSetEvent
      * @see JCudaDriver#cuEventRecord
      * @see JCudaDriver#cuStreamWaitEvent
      * @see JCudaDriver#cuGraphCreate
      * @see JCudaDriver#cuGraphDestroyNode
      * @see JCudaDriver#cuGraphInstantiate
      */
     public static int cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event)
     {
         return checkResult(cuGraphExecEventWaitNodeSetEventNative(hGraphExec, hNode, event));
     }
     private static native int cuGraphExecEventWaitNodeSetEventNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);

     
     /**
      *       * \brief Sets the parameters for an external semaphore signal node in the given graphExec
      *
      * Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.
      *
      * \p hNode must not have been removed from the original graph.
      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.
      *
      * Changing \p nodeParams->numExtSems is not supported.
      *
      * \param hGraphExec - The executable graph in which to set the specified node
      * \param hNode      - semaphore signal node from the graph from which graphExec was instantiated
      * \param nodeParams - Updated Parameters to set
      *
      * \return
      * ::CUDA_SUCCESS,
      * ::CUDA_ERROR_INVALID_VALUE,
      * \note_graph_thread_safety
      * \notefnerr
      *
      * \sa
      * ::cuGraphAddExternalSemaphoresSignalNode,
      * ::cuImportExternalSemaphore,
      * ::cuSignalExternalSemaphoresAsync,
      * ::cuWaitExternalSemaphoresAsync,
      * ::cuGraphCreate,
      * ::cuGraphDestroyNode,
      * ::cuGraphInstantiate
      * 
      */
     public static int cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams[])
     {
         // TODO Not supported. Pull requests welcome.
         throw new UnsupportedOperationException("The cuGraphExecExternalSemaphoresSignalNodeSetParams function is not supported in JCuda");
     }

     /**
      *       * \brief Sets the parameters for an external semaphore wait node in the given graphExec
      *
      * Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.
      *
      * \p hNode must not have been removed from the original graph.
      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.
      *
      * Changing \p nodeParams->numExtSems is not supported.
      *
      * \param hGraphExec - The executable graph in which to set the specified node
      * \param hNode      - semaphore wait node from the graph from which graphExec was instantiated
      * \param nodeParams - Updated Parameters to set
      *
      * \return
      * ::CUDA_SUCCESS,
      * ::CUDA_ERROR_INVALID_VALUE,
      * \note_graph_thread_safety
      * \notefnerr
      *
      * \sa
      * ::cuGraphAddExternalSemaphoresWaitNode,
      * ::cuImportExternalSemaphore,
      * ::cuSignalExternalSemaphoresAsync,
      * ::cuWaitExternalSemaphoresAsync,
      * ::cuGraphCreate,
      * ::cuGraphDestroyNode,
      * ::cuGraphInstantiate
      * 
      */
     public static int cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams[])
     {
         // TODO Not supported. Pull requests welcome.
         throw new UnsupportedOperationException("The cuGraphExecExternalSemaphoresWaitNodeSetParams function is not supported in JCuda");
     }
     
     /**
      * Uploads an executable graph in a stream.


      *
      * Uploads \p hGraphExec to the device in \p hStream without executing it. Uploads of
      * the same \p hGraphExec will be serialized. Each upload is ordered behind both any
      * previous work in \p hStream and any previous launches of \p hGraphExec.


      *
      * @param hGraphExec Executable graph to upload
      * @param hStream    Stream in which to upload the graph
      *
      * @return
      * CUDA_SUCCESS,
      * CUDA_ERROR_DEINITIALIZED,
      * CUDA_ERROR_NOT_INITIALIZED,
      * CUDA_ERROR_INVALID_VALUE
      *
      * @see JCudaDriver#cuGraphInstantiate
      * @see JCudaDriver#cuGraphLaunch
      * @see JCudaDriver#cuGraphExecDestroy
      */
     public static int cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream)
     {
         return checkResult(cuGraphUploadNative(hGraphExec, hStream));
     }
     private static native int cuGraphUploadNative(CUgraphExec hGraphExec, CUstream hStream);

    /**
     * Launches an executable graph in a stream.

     * 

     * Executes \p hGraphExec in \p hStream. Only one instance of \p hGraphExec may be executing
     * at a time. Each launch is ordered behind both any previous work in \p hStream
     * and any previous launches of \p hGraphExec. To execute a graph concurrently, it must be
     * instantiated multiple times into multiple executable graphs.
     *
     * @param hGraphExec - Executable graph to launch
     * @param hStream    - Stream in which to launch the graph
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see
     * JCudaDriver#cuGraphInstantiate
     * JCudaDriver#cuGraphExecDestroy
     */
    public static int cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) 
    {
        return checkResult(cuGraphLaunchNative(hGraphExec, hStream));
    }
    private static native int cuGraphLaunchNative(CUgraphExec hGraphExec, CUstream hStream);
    

    /**
     * Destroys an executable graph.

     * 

     * Destroys the executable graph specified by \p hGraphExec, as well
     * as all of its executable nodes. If the executable graph is
     * in-flight, it will not be terminated, but rather freed
     * asynchronously on completion.
     *
     * @param hGraphExec - Executable graph to destroy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see
     * JCudaDriver#cuGraphInstantiate
     * JCudaDriver#cuGraphLaunch
     */
    public static int cuGraphExecDestroy(CUgraphExec hGraphExec) 
    {
        return checkResult(cuGraphExecDestroyNative(hGraphExec));
    }
    private static native int cuGraphExecDestroyNative(CUgraphExec hGraphExec);
    

    /**
     * Destroys a graph.

     * 

     * Destroys the graph specified by \p hGraph, as well as all of its nodes.
     *
     * @param hGraph - Graph to destroy
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see
     * JCudaDriver#cuGraphCreate
     */
    public static int cuGraphDestroy(CUgraph hGraph) 
    {
        return checkResult(cuGraphDestroyNative(hGraph));
    }
    private static native int cuGraphDestroyNative(CUgraph hGraph);
    
    
    /**
     * Check whether an executable graph can be updated with a graph and perform the update if possible.

     * 

     * Updates the node parameters in the instantiated graph specified by \p hGraphExec with the
     * node parameters in a topologically identical graph specified by \p hGraph.

     *
     * Limitations:
     *      * - Kernel nodes:
     *   - The function must not change (same restriction as cuGraphExecKernelNodeSetParams())
     * - Memset and memcpy nodes:
     *   - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.
     *   - The source/destination memory must be allocated from the same contexts as the original
     *     source/destination memory.
     *   - Only 1D memsets can be changed.
     * - Additional memcpy node restrictions:
     *   - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE,
     *     CU_MEMORYTYPE_ARRAY, etc.) is not supported.
     * 
     * Note:  The API may add further restrictions in future releases.  The return code should always be checked.
     *  
     * Some node types are not currently supported:
     * - Empty graph nodes(CU_GRAPH_NODE_TYPE_EMPTY)
     * - Child graphs(CU_GRAPH_NODE_TYPE_GRAPH).
     * 
     * cuGraphExecUpdate sets \p updateResult_out to CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED under
     * the following conditions:
     *      * - The count of nodes directly in \p hGraphExec and \p hGraph differ, in which case \p hErrorNode_out
     *   is NULL.
     * - A node is deleted in \p hGraph but not not its pair from \p hGraphExec, in which case \p hErrorNode_out
     *   is NULL.
     * - A node is deleted in \p hGraphExec but not its pair from \p hGraph, in which case \p hErrorNode_out is
     *   the pairless node from \p hGraph.
     * - The dependent nodes of a pair differ, in which case \p hErrorNode_out is the node from \p hGraph.
     * 
     * cuGraphExecUpdate sets \p updateResult_out to:
     *      * - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology changed
     * - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node changed, in which case
     *   \p hErrorNode_out is set to the node from \p hGraph.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED if the func field of a kernel changed, in which
     *   case \p hErrorNode_out is set to the node from \p hGraph
     * - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a node changed in a way 
     *   that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is unsupported, like 
     *   the node’s type or configuration, in which case \p hErrorNode_out is set to the node from \p hGraph
     * 
     * If \p updateResult_out isn’t set in one of the situations described above, the update check passes
     * and cuGraphExecUpdate updates \p hGraphExec to match the contents of \p hGraph.  If an error happens
     * during the update, \p updateResult_out will be set to CU_GRAPH_EXEC_UPDATE_ERROR; otherwise,
     * \p updateResult_out is set to CU_GRAPH_EXEC_UPDATE_SUCCESS.

     * 

     * cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed successfully.  It returns
     * CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the graph update was not performed because it included 
     * changes which violated constraints specific to instantiated graph update.

     * 
     * @param hGraphExec The instantiated graph to be updated
     * @param hGraph The graph containing the updated parameters
     * @param hErrorNode_out The node which caused the permissibility check to forbid the update, if any
     * @param updateResult_out Whether the graph update was permitted.  If was forbidden, the reason why
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE
     *
     * @see JCudaDriver#cuGraphInstantiate
     */
    public static int cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode hErrorNode_out, int updateResult_out[])
    {
        return checkResult(cuGraphExecUpdateNative(hGraphExec, hGraph, hErrorNode_out, updateResult_out));
    }
    private static native int cuGraphExecUpdateNative(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode hErrorNode_out, int updateResult_out[]);
    
    
    /**
     * Copies attributes from source node to destination node.
     *
     * Copies attributes from source node src to destination node dst.
     * Both node must have the same context.
     *
     * @param dst Destination node
     * @param src Source node
     * For list of attributes see ::CUkernelNodeAttrID
     *
     * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src)
    {
        return checkResult(cuGraphKernelNodeCopyAttributesNative(dst, src));
    }
    private static native int  cuGraphKernelNodeCopyAttributesNative(CUgraphNode dst, CUgraphNode src);

    /**
     * Queries node attribute.
     * 
     * Queries attribute attr from node hNode and stores it in corresponding
     * member of value_out.
     *
     * @param hNode
     * @param attr
     * @param value_out 
     *
     * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuGraphKernelNodeGetAttribute(CUgraphNode hNode, int attr,
        CUkernelNodeAttrValue value_out)
    {
        return checkResult(cuGraphKernelNodeGetAttributeNative(hNode, attr, value_out));
    }
    private static native int cuGraphKernelNodeGetAttributeNative(CUgraphNode hNode, int attr,
        CUkernelNodeAttrValue value_out);
     
    /**
     * Sets node attribute.
     * 
     * Sets attribute attr on node hNode from corresponding attribute of
     * value.
     *
     * @param hNode
     * @param attr
     * @param value 
     *
     * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuGraphKernelNodeSetAttribute(CUgraphNode hNode, int attr,
        CUkernelNodeAttrValue value) 
    {
        return checkResult(cuGraphKernelNodeSetAttributeNative(hNode, attr, value));            
    }
    private static native int cuGraphKernelNodeSetAttributeNative(CUgraphNode hNode, int attr,
        CUkernelNodeAttrValue value);
    
    /**
     *      * \brief Returns occupancy of a function
     *
     * Returns in \p *numBlocks the number of the maximum active blocks per
     * streaming multiprocessor.
     *
     * \param numBlocks       - Returned occupancy
     * \param func            - Kernel for which occupancy is calulated
     * \param blockSize       - Block size the kernel is intended to be launched with
     * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
     */
    public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize)
    {
        return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorNative(numBlocks, func, blockSize, dynamicSMemSize));
    }
    private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize);

    /**
     *      * \brief Suggest a launch configuration with reasonable occupancy
     *
     * An extended version of ::cuOccupancyMaxPotentialBlockSize. In
     * addition to arguments passed to ::cuOccupancyMaxPotentialBlockSize,
     * ::cuOccupancyMaxPotentialBlockSizeWithFlags also takes a \p Flags
     * parameter.
     *
     * The \p Flags parameter controls how special cases are handled. The
     * valid flags are:
     *
     * - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as
     *   ::cuOccupancyMaxPotentialBlockSize;
     *
     * - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the
     *   default behavior on platform where global caching affects
     *   occupancy. On such platforms, the launch configurations that
     *   produces maximal occupancy might not support global
     *   caching. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE
     *   guarantees that the the produced launch configuration is global
     *   caching compatible at a potential cost of occupancy. More information
     *   can be found about this feature in the "Unified L1/Texture Cache"
     *   section of the Maxwell tuning guide.
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calculated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     * \param flags       - Options
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     *
     * \sa
     * ::cudaOccupancyMaxPotentialBlockSizeWithFlags
     * 
     */
    public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags)
    {
        return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(numBlocks, func, blockSize, dynamicSMemSize, flags));
    }
    private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags);

    /**
     *      * \brief Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM 
     *
     * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM. 
     *
     * \param dynamicSmemSize - Returned maximum dynamic shared memory 
     * \param func            - Kernel function for which occupancy is calculated
     * \param numBlocks       - Number of blocks to fit on SM 
     * \param blockSize       - Size of the blocks
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     *
     * \sa
     * 
     */
    public static int cuOccupancyAvailableDynamicSMemPerBlock(long dynamicSmemSize[], CUfunction func, int numBlocks, int blockSize)
    {
        return checkResult(cuOccupancyAvailableDynamicSMemPerBlockNative(dynamicSmemSize, func, numBlocks, blockSize));
    }
    private static native int cuOccupancyAvailableDynamicSMemPerBlockNative(long dynamicSmemSize[], CUfunction func, int numBlocks, int blockSize);


    /**
     *      * \brief Suggest a launch configuration with reasonable occupancy
     *
     * Returns in \p *blockSize a reasonable block size that can achieve
     * the maximum occupancy (or, the maximum number of active warps with
     * the fewest blocks per multiprocessor), and in \p *minGridSize the
     * minimum grid size to achieve the maximum occupancy.
     *
     * If \p blockSizeLimit is 0, the configurator will use the maximum
     * block size permitted by the device / function instead.
     *
     * If per-block dynamic shared memory allocation is not needed, the
     * user should leave both \p blockSizeToDynamicSMemSize and \p
     * dynamicSMemSize as 0.
     *
     * If per-block dynamic shared memory allocation is needed, then if
     * the dynamic shared memory size is constant regardless of block
     * size, the size should be passed through \p dynamicSMemSize, and \p
     * blockSizeToDynamicSMemSize should be NULL.
     *
     * Otherwise, if the per-block dynamic shared memory size varies with
     * different block sizes, the user needs to provide a unary function
     * through \p blockSizeToDynamicSMemSize that computes the dynamic
     * shared memory needed by \p func for any given block size. \p
     * dynamicSMemSize is ignored. An example signature is:
     *
     * \code
     *    // Take block size, returns dynamic shared memory needed
     *    size_t blockToSmem(int blockSize);
     * \endcode
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calulated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
     */
    public static int cuOccupancyMaxPotentialBlockSize(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit)
    {
        // The callback involves a state on the native side,
        // so ensure synchronization here
        synchronized (OCCUPANCY_LOCK)
        {
            return checkResult(cuOccupancyMaxPotentialBlockSizeNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit));
        }
    }
    private static native int cuOccupancyMaxPotentialBlockSizeNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit);


    public static int cuOccupancyMaxPotentialBlockSizeWithFlags(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags)
    {
        // The callback involves a state on the native side,
        // so ensure synchronization here
        synchronized (OCCUPANCY_LOCK)
        {
            return checkResult(cuOccupancyMaxPotentialBlockSizeWithFlagsNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags));
        }
    }
    private static native int cuOccupancyMaxPotentialBlockSizeWithFlagsNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags);

    private static final Object OCCUPANCY_LOCK = new Object();

    /**
     * Launches a CUDA function.
     *
     *      * CUresult cuLaunch (
     *      CUfunction f )
     * 
     * 
     *   Launches a CUDA function.
     *     Deprecated Invokes the kernel f
     *     on a 1 x 1 x 1 grid of blocks. The block contains the number of threads
     *     specified by a previous call to cuFuncSetBlockShape().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param f Kernel to launch
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuLaunch(CUfunction f)
    {
        return checkResult(cuLaunchNative(f));
    }

    private static native int cuLaunchNative(CUfunction f);


    /**
     * Launches a CUDA function.
     *
     *      * CUresult cuLaunchGrid (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height )
     * 
     * 
     *   Launches a CUDA function.
     *     Deprecated Invokes the kernel f
     *     on a grid_width x grid_height grid of blocks. Each
     *     block contains the number of threads specified by a previous call to
     *     cuFuncSetBlockShape().
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param f Kernel to launch
     * @param grid_width Width of grid in blocks
     * @param grid_height Height of grid in blocks
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGridAsync
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
    {
        return checkResult(cuLaunchGridNative(f, grid_width, grid_height));
    }

    private static native int cuLaunchGridNative(CUfunction f, int grid_width, int grid_height);


    /**
     * Launches a CUDA function.
     *
     *      * CUresult cuLaunchGridAsync (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height,
     *      CUstream hStream )
     * 
     * 
     *   Launches a CUDA function.
     *     Deprecated Invokes the kernel f
     *     on a grid_width x grid_height grid of blocks. Each
     *     block contains the number of threads specified by a previous call to
     *     cuFuncSetBlockShape().
     *   
     *   cuLaunchGridAsync() can optionally be
     *     associated to a stream by passing a non-zero hStream
     *     argument.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param f Kernel to launch
     * @param grid_width Width of grid in blocks
     * @param grid_height Height of grid in blocks
     * @param hStream Stream identifier
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_FAILED,
     * CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT,
     * CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuFuncSetBlockShape
     * @see JCudaDriver#cuFuncSetSharedSize
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuParamSetSize
     * @see JCudaDriver#cuParamSetf
     * @see JCudaDriver#cuParamSeti
     * @see JCudaDriver#cuParamSetv
     * @see JCudaDriver#cuLaunch
     * @see JCudaDriver#cuLaunchGrid
     * @see JCudaDriver#cuLaunchKernel
     * 
     * @deprecated Deprecated in CUDA
     */
    @Deprecated
    public static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream)
    {
        return checkResult(cuLaunchGridAsyncNative(f, grid_width, grid_height, hStream));
    }

    private static native int cuLaunchGridAsyncNative(CUfunction f, int grid_width, int grid_height, CUstream hStream);


    /**
     * Creates an event.
     *
     *      * CUresult cuEventCreate (
     *      CUevent* phEvent,
     *      unsigned int  Flags )
     * 
     * 
     *   Creates an event.  Creates an event
     *     *phEvent with the flags specified via Flags. Valid flags
     *     include:
     *   

     *     
     *       CU_EVENT_DEFAULT: Default event
     *         creation flag.
     *       
     *     
     *     
     *       CU_EVENT_BLOCKING_SYNC:
     *         Specifies that the created event should use blocking synchronization.
     *         A CPU thread that uses cuEventSynchronize() to wait on an event created
     *         with this flag will block until the event has actually been recorded.
     *       
     *     
     *     
     *       CU_EVENT_DISABLE_TIMING:
     *         Specifies that the created event does not need to record timing data.
     *         Events created with this flag specified and the CU_EVENT_BLOCKING_SYNC
     *         flag not specified will provide the best performance when used with
     *         cuStreamWaitEvent() and cuEventQuery().
     *       
     *     
     *     
     *       CU_EVENT_INTERPROCESS: Specifies
     *         that the created event may be used as an interprocess event by
     *         cuIpcGetEventHandle(). CU_EVENT_INTERPROCESS must be specified along
     *         with CU_EVENT_DISABLE_TIMING.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param phEvent Returns newly created event
     * @param Flags Event creation flags
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */
    public static int cuEventCreate(CUevent phEvent, int Flags)
    {
        return checkResult(cuEventCreateNative(phEvent, Flags));
    }

    private static native int cuEventCreateNative(CUevent phEvent, int Flags);


    /**
     * Records an event.
     *
     *      * CUresult cuEventRecord (
     *      CUevent hEvent,
     *      CUstream hStream )
     * 
     * 
     *   Records an event.  Records an event. If
     *     hStream is non-zero, the event is recorded after all preceding
     *     operations in hStream have been completed; otherwise, it is
     *     recorded after all preceding operations in the CUDA context have been
     *     completed. Since
     *     operation is asynchronous, cuEventQuery
     *     and/or cuEventSynchronize() must be used to determine when the event
     *     has actually been recorded.
     *   
     *   If cuEventRecord() has previously been
     *     called on hEvent, then this call will overwrite any existing
     *     state in hEvent. Any subsequent calls which examine the
     *     status of hEvent will only examine the completion of this
     *     most recent call to cuEventRecord().
     *   
     *   It is necessary that hEvent
     *     and hStream be created on the same context.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hEvent Event to record
     * @param hStream Stream to record event for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */
    public static int cuEventRecord(CUevent hEvent, CUstream hStream)
    {
        return checkResult(cuEventRecordNative(hEvent, hStream));
    }

    private static native int cuEventRecordNative(CUevent hEvent, CUstream hStream);

    
    /**
     * Records an event.
     * 


     * Captures in \p hEvent the contents of \p hStream at the time of this call.
     * \p hEvent and \p hStream must be from the same context.
     * Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then
     * examine or wait for completion of the work that was captured. Uses of
     * \p hStream after this call do not modify \p hEvent. See note on default
     * stream behavior for what is captured in the default case.
     * 


     * ::cuEventRecordWithFlags() can be called multiple times on the same event and
     * will overwrite the previously captured state. Other APIs such as
     * ::cuStreamWaitEvent() use the most recently captured state at the time
     * of the API call, and are not affected by later calls to
     * ::cuEventRecordWithFlags(). Before the first call to ::cuEventRecordWithFlags(), an
     * event represents an empty set of work, so for example ::cuEventQuery()
     * would return ::CUDA_SUCCESS.
     * 


     * flags include:
     * - ::CU_EVENT_RECORD_DEFAULT: Default event creation flag.
     * - ::CU_EVENT_RECORD_EXTERNAL: Event is captured in the graph as an external
     *   event node when performing stream capture. This flag is invalid outside
     *   of stream capture.
     *
     * @param hEvent Event to record
     * @param hStream Stream to record event for
     * @param flags See ::CUevent_capture_flags
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cudaEventRecord
     */
    public static int cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, int flags)
    {
        return checkResult(cuEventRecordWithFlagsNative(hEvent, hStream, flags));
    }
    private static native int cuEventRecordWithFlagsNative(CUevent hEvent, CUstream hStream, int flags);

    /**
     * Queries an event's status.
     *
     *      * CUresult cuEventQuery (
     *      CUevent hEvent )
     * 
     * 
     *   Queries an event's status.  Query the
     *     status of all device work preceding the most recent call to
     *     cuEventRecord() (in the appropriate compute streams, as specified by
     *     the arguments to cuEventRecord()).
     *   
     *   If this work has successfully been
     *     completed by the device, or if cuEventRecord() has not been called on
     *     hEvent, then CUDA_SUCCESS is returned. If this work has not
     *     yet been completed by the device then CUDA_ERROR_NOT_READY is
     *     returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hEvent Event to query
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_NOT_READY
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */
    public static int cuEventQuery(CUevent hEvent)
    {
        return checkResult(cuEventQueryNative(hEvent));
    }

    private static native int cuEventQueryNative(CUevent hEvent);


    /**
     * Waits for an event to complete.
     *
     *      * CUresult cuEventSynchronize (
     *      CUevent hEvent )
     * 
     * 
     *   Waits for an event to complete.  Wait
     *     until the completion of all device work preceding the most recent call
     *     to cuEventRecord() (in the appropriate compute streams, as specified
     *     by the arguments to cuEventRecord()).
     *   
     *   If cuEventRecord() has not been called
     *     on hEvent, CUDA_SUCCESS is returned immediately.
     *   
     *   Waiting for an event that was created
     *     with the CU_EVENT_BLOCKING_SYNC flag will cause the calling CPU thread
     *     to block until the event has been completed by the device. If the
     *     CU_EVENT_BLOCKING_SYNC flag has not been set, then the CPU thread will
     *     busy-wait until the event has been completed by the device.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hEvent Event to wait for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventDestroy
     * @see JCudaDriver#cuEventElapsedTime
     */
    public static int cuEventSynchronize(CUevent hEvent)
    {
        return checkResult(cuEventSynchronizeNative(hEvent));
    }

    private static native int cuEventSynchronizeNative(CUevent hEvent);


    /**
     * Destroys an event.
     *
     *      * CUresult cuEventDestroy (
     *      CUevent hEvent )
     * 
     * 
     *   Destroys an event.  Destroys the event
     *     specified by hEvent.
     *   
     *   In case hEvent has been
     *     recorded but has not yet been completed when cuEventDestroy() is
     *     called, the function will return immediately and the resources
     *     associated with hEvent will be released automatically once
     *     the device has completed hEvent.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hEvent Event to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventElapsedTime
     */
    public static int cuEventDestroy(CUevent hEvent)
    {
        return checkResult(cuEventDestroyNative(hEvent));
    }

    private static native int cuEventDestroyNative(CUevent hEvent);


    /**
     * Computes the elapsed time between two events.
     *
     *      * CUresult cuEventElapsedTime (
     *      float* pMilliseconds,
     *      CUevent hStart,
     *      CUevent hEnd )
     * 
     * 
     *   Computes the elapsed time between two
     *     events.  Computes the elapsed time between two events (in milliseconds
     *     with a resolution
     *     of around 0.5 microseconds).
     *   
     *   If either event was last recorded in a
     *     non-NULL stream, the resulting time may be greater than expected (even
     *     if both used
     *     the same stream handle). This happens
     *     because the cuEventRecord() operation takes place asynchronously and
     *     there is no guarantee that the measured latency is actually just
     *     between the two
     *     events. Any number of other different
     *     stream operations could execute in between the two measured events,
     *     thus altering the
     *     timing in a significant way.
     *   
     *   If cuEventRecord() has not been called
     *     on either event then CUDA_ERROR_INVALID_HANDLE is returned. If
     *     cuEventRecord() has been called on both events but one or both of them
     *     has not yet been completed (that is, cuEventQuery() would return
     *     CUDA_ERROR_NOT_READY on at least one of the events), CUDA_ERROR_NOT_READY
     *     is returned. If either event was created with the CU_EVENT_DISABLE_TIMING
     *     flag, then this function will return CUDA_ERROR_INVALID_HANDLE.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pMilliseconds Time between hStart and hEnd in ms
     * @param hStart Starting event
     * @param hEnd Ending event
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_READY
     *
     * @see JCudaDriver#cuEventCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuEventQuery
     * @see JCudaDriver#cuEventSynchronize
     * @see JCudaDriver#cuEventDestroy
     */
    public static int cuEventElapsedTime(float pMilliseconds[], CUevent hStart, CUevent hEnd)
    {
        return checkResult(cuEventElapsedTimeNative(pMilliseconds, hStart, hEnd));
    }

    private static native int cuEventElapsedTimeNative(float pMilliseconds[], CUevent hStart, CUevent hEnd);

    /**
     * Wait on a memory location.

     * 

     * Enqueues a synchronization of the stream on the given memory location.
     * Work ordered after the operation will block until the given condition on
     * the memory is satisfied. By default, the condition is to wait for
     * (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other condition
     * types can be specified via flags.

     * 

     * If the memory was registered via cuMemHostRegister(), the device pointer
     * should be obtained with cuMemHostGetDevicePointer(). This function cannot
     * be used with managed memory (cuMemAllocManaged).

     * 

     * On Windows, the device must be using TCC, or the operation is not
     * supported. See cuDeviceGetAttributes().
     * 
     * @param stream The stream to synchronize on the memory location.
     * @param addr The memory location to wait on.
     * @param value The value to compare with the memory location.
     * @param flags See {@link CUstreamWaitValue_flags}
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamWriteValue32
     * @see JCudaDriver#cuStreamBatchMemOp
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, int value, int flags)
    {
        return checkResult(cuStreamWaitValue32Native(stream, addr, value, flags));
    }
    private static native int cuStreamWaitValue32Native(CUstream stream, CUdeviceptr addr, int value, int flags);

    /**
     * Write a value to memory.

     * 

     * Write a value to memory. Unless the
     * CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER flag is passed, the write is
     * preceded by a system-wide memory fence, equivalent to a
     * __threadfence_system() but scoped to the stream rather than a CUDA
     * thread.

     * 

     * If the memory was registered via cuMemHostRegister(), the device pointer
     * should be obtained with cuMemHostGetDevicePointer(). This function cannot
     * be used with managed memory (cuMemAllocManaged).

     * 

     * On Windows, the device must be using TCC, or the operation is not
     * supported. See cuDeviceGetAttribute().
     * 
     * @param stream The stream to do the write in.
     * @param addr The device address to write to.
     * @param value The value to write.
     * @param flags See {@link CUstreamWriteValue_flags}
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamWaitValue32
     * @see JCudaDriver#cuStreamBatchMemOp
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuEventRecord
     */
    public static int cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, int value, int flags)
    {
        return checkResult(cuStreamWriteValue32Native(stream, addr, value, flags));
    }
    private static native int cuStreamWriteValue32Native(CUstream stream, CUdeviceptr addr, int value, int flags);

    
    /**
     * Wait on a memory location.

     * 

     * Enqueues a synchronization of the stream on the given memory location.
     * Work ordered after the operation will block until the given condition on
     * the memory is satisfied. By default, the condition is to wait for
     * (int64_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other condition
     * types can be specified via flags.

     * 

     * If the memory was registered via cuMemHostRegister(), the device pointer
     * should be obtained with cuMemHostGetDevicePointer(). This function cannot
     * be used with managed memory (cuMemAllocManaged).

     * 

     * On Windows, the device must be using TCC, or the operation is not
     * supported. See cuDeviceGetAttributes().
     * 
     * @param stream The stream to synchronize on the memory location.
     * @param addr The memory location to wait on.
     * @param value The value to compare with the memory location.
     * @param flags See {@link CUstreamWaitValue_flags}
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamWriteValue64
     * @see JCudaDriver#cuStreamBatchMemOp
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuStreamWaitEvent
     */
    public static int cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, long value, int flags)
    {
        return checkResult(cuStreamWaitValue64Native(stream, addr, value, flags));
    }
    private static native int cuStreamWaitValue64Native(CUstream stream, CUdeviceptr addr, long value, int flags);

    /**
     * Write a value to memory.

     * 

     * Write a value to memory. Unless the
     * CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER flag is passed, the write is
     * preceded by a system-wide memory fence, equivalent to a
     * __threadfence_system() but scoped to the stream rather than a CUDA
     * thread.

     * 

     * If the memory was registered via cuMemHostRegister(), the device pointer
     * should be obtained with cuMemHostGetDevicePointer(). This function cannot
     * be used with managed memory (cuMemAllocManaged).

     * 

     * On Windows, the device must be using TCC, or the operation is not
     * supported. See cuDeviceGetAttribute().
     * 
     * @param stream The stream to do the write in.
     * @param addr The device address to write to.
     * @param value The value to write.
     * @param flags See {@link CUstreamWriteValue_flags}
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamWaitValue64
     * @see JCudaDriver#cuStreamBatchMemOp
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuEventRecord
     */
    public static int cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, long value, int flags)
    {
        return checkResult(cuStreamWriteValue64Native(stream, addr, value, flags));
    }
    private static native int cuStreamWriteValue64Native(CUstream stream, CUdeviceptr addr, long value, int flags);
    
    /**
     * 

     * NOTE: This function is not yet supported in JCuda, and will throw an
     * UnsupportedOperationException!

     * 

     * 
     * Batch operations to synchronize the stream via memory operations.

     * 

     * This is a batch version of cuStreamWaitValue32() and
     * cuStreamWriteValue32(). Batching operations may avoid some performance
     * overhead in both the API call and the device execution versus adding them
     * to the stream in separate API calls. The operations are enqueued in the
     * order they appear in the array.

     * 

     * See CUstreamBatchMemOpType for the full set of supported operations, and
     * cuStreamWaitValue32() and cuStreamWriteValue32() for details of specific
     * operations.

     * 

     * On Windows, the device must be using TCC, or this call is not supported.
     * See cuDeviceGetAttribute().
     * 
     * @param stream The stream to enqueue the operations in.
     * @param count The number of operations in the array. Must be less than
     *        256.
     * @param paramArray The types and parameters of the individual operations.
     * @param flags Reserved for future expansion; must be 0.
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED
     * 
     * @see JCudaDriver#cuStreamWaitValue32
     * @see JCudaDriver#cuStreamWriteValue32
     * @see JCudaDriver#cuMemHostRegister
     */
    public static int cuStreamBatchMemOp(CUstream stream, int count, CUstreamBatchMemOpParams paramArray[], int flags)
    {
        // TODO Implement cuStreamBatchMemOp
        throw new UnsupportedOperationException("The cuStreamBatchMemOp function is not yet supported in JCuda");
    }
    
    /**
     * Returns information about a pointer.
     *
     *      * CUresult cuPointerGetAttribute (
     *      void* data,
     *      CUpointer_attribute attribute,
     *      CUdeviceptr ptr )
     * 
     * 
     *   Returns information about a pointer.
     *     The supported attributes are:
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_CONTEXT:
     *       
     *     
     *   
     *   
     *   Returns in *data the CUcontext
     *     in which ptr was allocated or registered. The type of data must be CUcontext *.
     *   
     *   If ptr was not allocated by,
     *     mapped by, or registered with a CUcontext which uses unified virtual
     *     addressing then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_MEMORY_TYPE:
     *       
     *     
     *   
     *   
     *   Returns in *data the physical
     *     memory type of the memory that ptr addresses as a CUmemorytype
     *     enumerated value. The type of data must be unsigned int.
     *   
     *   If ptr addresses device memory
     *     then *data is set to CU_MEMORYTYPE_DEVICE. The particular
     *     CUdevice on which the memory resides is the CUdevice of the CUcontext
     *     returned by the CU_POINTER_ATTRIBUTE_CONTEXT attribute of ptr.
     *   
     *   If ptr addresses host memory
     *     then *data is set to CU_MEMORYTYPE_HOST.
     *   
     *   If ptr was not allocated by,
     *     mapped by, or registered with a CUcontext which uses unified virtual
     *     addressing then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   If the current CUcontext does not
     *     support unified virtual addressing then CUDA_ERROR_INVALID_CONTEXT is
     *     returned.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_DEVICE_POINTER:
     *       
     *     
     *   
     *   
     *   Returns in *data the device
     *     pointer value through which ptr may be accessed by kernels
     *     running in the current CUcontext. The type of data must be
     *     CUdeviceptr *.
     *   
     *   If there exists no device pointer value
     *     through which kernels running in the current CUcontext may access ptr then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   If there is no current CUcontext then
     *     CUDA_ERROR_INVALID_CONTEXT is returned.
     *   
     *   Except in the exceptional disjoint
     *     addressing cases discussed below, the value returned in *data
     *     will equal the input value ptr.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_HOST_POINTER:
     *       
     *     
     *   
     *   
     *   Returns in *data the host
     *     pointer value through which ptr may be accessed by by the
     *     host program. The type of data must be void **. If there
     *     exists no host pointer value through which the host program may directly
     *     access ptr then CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   Except in the exceptional disjoint
     *     addressing cases discussed below, the value returned in *data
     *     will equal the input value ptr.
     *   
     *   
     *     
     *       CU_POINTER_ATTRIBUTE_P2P_TOKENS:
     *       
     *     
     *   
     *   
     *   Returns in *data two tokens
     *     for use with the nv-p2p.h Linux kernel interface. data must
     *     be a struct of type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS.
     *   
     *   ptr must be a pointer to
     *     memory obtained from :cuMemAlloc(). Note that p2pToken and vaSpaceToken
     *     are only valid for the lifetime of the source allocation. A subsequent
     *     allocation at
     *     the same address may return completely
     *     different tokens.
     *   
     *   
     *     Note that for most allocations in the
     *     unified virtual address space the host and device pointer for accessing
     *     the allocation
     *     will be the same. The exceptions to this
     *     are
     *   

     *     
     *       user memory registered using
     *         cuMemHostRegister
     *       
     *     
     *     
     *       host memory allocated using
     *         cuMemHostAlloc with the CU_MEMHOSTALLOC_WRITECOMBINED flag For these
     *         types of allocation there will exist separate, disjoint host and device
     *         addresses for accessing the allocation.
     *         In particular
     *       
     *     
     *     
     *       The host address will correspond
     *         to an invalid unmapped device address (which will result in an exception
     *         if accessed from
     *         the device)
     *       
     *     
     *     
     *       The device address will
     *         correspond to an invalid unmapped host address (which will result in
     *         an exception if accessed from
     *         the host). For these types of
     *         allocations, querying CU_POINTER_ATTRIBUTE_HOST_POINTER and
     *         CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host
     *         and device addresses from either address.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param data Returned pointer attribute value
     * @param attribute Pointer attribute to query
     * @param ptr Pointer
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuMemAlloc
     * @see JCudaDriver#cuMemFree
     * @see JCudaDriver#cuMemAllocHost
     * @see JCudaDriver#cuMemFreeHost
     * @see JCudaDriver#cuMemHostAlloc
     * @see JCudaDriver#cuMemHostRegister
     * @see JCudaDriver#cuMemHostUnregister
     */
    public static int cuPointerGetAttribute(Pointer data, int attribute, CUdeviceptr ptr)
    {
        return checkResult(cuPointerGetAttributeNative(data, attribute, ptr));
    }

    private static native int cuPointerGetAttributeNative(Pointer data, int attribute, CUdeviceptr ptr);


    /**
     * Prefetches memory to the specified destination device

     * 

     * Prefetches memory to the specified destination device. devPtr is the
     * base device pointer of the memory to be prefetched and dstDevice is the
     * destination device. count specifies the number of bytes to copy.
     * hStream is the stream in which the operation is enqueued.

     * 

     * Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory.

     * 

     * If no physical memory has been allocated for this region, then this memory region
     * will be populated and mapped on the destination device. If there's insufficient
     * memory to prefetch the desired region, the Unified Memory driver may evict pages
     * belonging to other memory regions to make room. If there's no memory that can be
     * evicted, then the Unified Memory driver will prefetch less than what was requested.
     * 

     * 

     * In the normal case, any mappings to the previous location of the migrated pages are
     * removed and mappings for the new location are only setup on the dstDevice.
     * The application can exercise finer control on these mappings using ::cudaMemAdvise.

     * 

     * Note that this function is asynchronous with respect to the host and all work
     * on other devices.

     *
     * @param devPtr Pointer to be prefetched
     * @param count Size in bytes
     * @param dstDevice Destination device to prefetch to
     * @param hStream Stream to enqueue prefetch operation
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuMemcpy
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     * @see JCudaDriver#cuMemAdvise
     */
    public static int cuMemPrefetchAsync(CUdeviceptr devPtr, long count, CUdevice dstDevice, CUstream hStream)
    {
        return checkResult(cuMemPrefetchAsyncNative(devPtr, count, dstDevice, hStream));
    }
    private static native int cuMemPrefetchAsyncNative(CUdeviceptr devPtr, long count, CUdevice dstDevice, CUstream hStream);


    /**
     * Advise about the usage of a given memory range

     * 

     * Advise the Unified Memory subsystem about the usage pattern for the memory range
     * starting at devPtr with a size of count bytes.

     * 

     * The advice parameter can take the following values:
     * 
     *  CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read
     * from and only occasionally written to. This allows the driver to create read-only
     * copies of the data in a processor's memory when that processor accesses it. Similarly,
     * if cuMemPrefetchAsync is called on this region, it will create a read-only copy of
     * the data on the destination processor. When a processor writes to this data, all copies
     * of the corresponding page are invalidated except for the one where the write occurred.
     * The device argument is ignored for this advice.
     * 

     * 
CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read
     * duplicated copies of the data will be freed no later than the next write access to that data.
     * 
     * CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the
     * data to be the memory belonging to device. Passing in CU_DEVICE_CPU for device sets the
     * preferred location as CPU memory. Setting the preferred location does not cause data to
     * migrate to that location immediately. Instead, it guides the migration policy when a fault
     * occurs on that memory region. If the data is already in its preferred location and the
     * faulting processor can establish a mapping without requiring the data to be migrated, then
     * the migration will be avoided. On the other hand, if the data is not in its preferred location
     * or if a direct mapping cannot be established, then it will be migrated to the processor accessing
     * it. It is important to note that setting the preferred location does not prevent data prefetching
     * done using ::cuMemPrefetchAsync.
     * Having a preferred location can override the thrash detection and resolution logic in the Unified
     * Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU
     * memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But
     * if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely.
     * When the Unified Memory driver has to evict pages from a certain location on account of that
     * memory being oversubscribed, the preferred location will be used to decide the destination to which
     * a page should be evicted to.
     * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred
     * location will be ignored for that subset.
     * 
     * CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION
     * and changes the preferred location to none.
     * 
     * CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by device.
     * This does not cause data migration and has no impact on the location of the data per se. Instead,
     * it causes the data to always be mapped in the specified processor's page tables, as long as the
     * location of the data permits a mapping to be established. If the data gets migrated for any reason,
     * the mappings are updated accordingly.
     * This advice is useful in scenarios where data locality is not important, but avoiding faults is.
     * Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the
     * data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data
     * over to the other GPUs is not as important because the accesses are infrequent and the overhead of
     * migration may be too high. But preventing faults can still help improve performance, and so having
     * a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated
     * to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the
     * ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the
     * page in CPU memory.
     * 
     * CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of
     * mappings may be removed at any time causing accesses to result in page faults.
     * 
     * 
     * Passing in ::CU_DEVICE_CPU for device will set the advice for the CPU.

     * 

     * Note that this function is asynchronous with respect to the host and all work
     * on other devices.
     *
     * @param devPtr Pointer to memory to set the advice for
     * @param count  Size in bytes of the memory range
     * @param advice Advice to be applied for the specified memory range
     * @param device Device to apply the advice for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuMemcpy
     * @see JCudaDriver#cuMemcpyPeer
     * @see JCudaDriver#cuMemcpyAsync
     * @see JCudaDriver#cuMemcpy3DPeerAsync
     * @see JCudaDriver#cuMemPrefetchAsync
     */
    public static int cuMemAdvise(CUdeviceptr devPtr, long count, int advice, CUdevice device)
    {
        return checkResult(cuMemAdviseNative(devPtr, count, advice, device));
    }
    private static native int cuMemAdviseNative(CUdeviceptr devPtr, long count, int advice, CUdevice device);

    /**
     * Query an attribute of a given memory range.

     * 
     * Query an attribute about the memory range starting at devPtr with a size
     * of count bytes. The memory range must refer to managed memory allocated
     * via cuMemAllocManaged or declared via __managed__ variables. 

     * 

     * The attribute parameter can take the following values: 

     * 
     * CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: If this attribute is specified,
     * data will be interpreted as a 32-bit integer, and dataSize must be 4. The
     * result returned will be 1 if all pages in the given memory range have
     * read-duplication enabled, or 0 otherwise.
     * CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: If this attribute is
     * specified, data will be interpreted as a 32-bit integer, and dataSize
     * must be 4. The result returned will be a GPU device id if all pages in
     * the memory range have that GPU as their preferred location, or it will be
     * CU_DEVICE_CPU if all pages in the memory range have the CPU as their
     * preferred location, or it will be CU_DEVICE_INVALID if either all the
     * pages don't have the same preferred location or some of the pages don't
     * have a preferred location at all. Note that the actual location of the
     * pages in the memory range at the time of the query may be different from
     * the preferred location.
     * CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: If this attribute is specified,
     * data will be interpreted as an array of 32-bit integers, and dataSize
     * must be a non-zero multiple of 4. The result returned will be a list of
     * device ids that had CU_MEM_ADVISE_SET_ACCESSED_BY set for that entire
     * memory range. If any device does not have that advice set for the entire
     * memory range, that device will not be included. If data is larger than
     * the number of devices that have that advice set for that memory range,
     * CU_DEVICE_INVALID will be returned in all the extra space provided. For
     * ex., if dataSize is 12 (i.e. data has 3 elements) and only device 0 has
     * the advice set, then the result returned will be { 0, CU_DEVICE_INVALID,
     * CU_DEVICE_INVALID }. If data is smaller than the number of devices that
     * have that advice set, then only as many devices will be returned as can
     * fit in the array. There is no guarantee on which specific devices will be
     * returned, however.
     * CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: If this attribute is
     * specified, data will be interpreted as a 32-bit integer, and dataSize
     * must be 4. The result returned will be the last location to which all
     * pages in the memory range were prefetched explicitly via
     * cuMemPrefetchAsync. This will either be a GPU id or CU_DEVICE_CPU
     * depending on whether the last location for prefetch was a GPU or the CPU
     * respectively. If any page in the memory range was never explicitly
     * prefetched or if all pages were not prefetched to the same location,
     * CU_DEVICE_INVALID will be returned. Note that this simply returns the
     * last location that the applicaton requested to prefetch the memory range
     * to. It gives no indication as to whether the prefetch operation to that
     * location has completed or even begun.
     * 
     * 
     * @param data A pointers to a memory location where the result of each
     * attribute query will be written to.
     * @param dataSize Array containing the size of data
     * @param attribute The attribute to query
     * @param devPtr Start of the range to query
     * @param count Size of the range to query
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     * 
     * @see JCudaDriver#cuMemRangeGetAttributes
     * @see JCudaDriver#cuMemPrefetchAsync
     * @see JCudaDriver#cuMemAdvise
     */
    public static int cuMemRangeGetAttribute(Pointer data, long dataSize, int attribute, CUdeviceptr devPtr, long count)
    {
        return checkResult(cuMemRangeGetAttributeNative(data, dataSize, attribute, devPtr, count));
    }
    private static native int cuMemRangeGetAttributeNative(Pointer data, long dataSize, int attribute, CUdeviceptr devPtr, long count);
    
    /**
     * Query attributes of a given memory range.

     * 

     * Query attributes of the memory range starting at devPtr with a size of
     * count bytes. The memory range must refer to managed memory allocated via
     * cuMemAllocManaged or declared via __managed__ variables. The attributes
     * array will be interpreted to have numAttributes entries. The dataSizes
     * array will also be interpreted to have numAttributes entries. The results
     * of the query will be stored in data.

     * 

     * 

     * The list of supported attributes are given below. Please refer to
     * {@link JCudaDriver#cuMemRangeGetAttribute} for attribute descriptions and
     * restrictions.
     * 
     * CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY
     * CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION
     * CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY
     * CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
     * 
     * 
     * @param data A two-dimensional array containing pointers to memory
     *        locations where the result of each attribute query will be written
     *        to.
     * @param dataSizes Array containing the sizes of each result
     * @param attributes An array of attributes to query (numAttributes and the
     *        number of attributes in this array should match)
     * @param numAttributes Number of attributes to query
     * @param devPtr Start of the range to query
     * @param count Size of the range to query
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED,
     *         CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     *         CUDA_ERROR_INVALID_DEVICE
     */
    public static int cuMemRangeGetAttributes(Pointer data[], long dataSizes[], int attributes[], long numAttributes, CUdeviceptr devPtr, long count)
    {
        return checkResult(cuMemRangeGetAttributesNative(data, dataSizes, attributes, numAttributes, devPtr, count));
    }
    private static native int cuMemRangeGetAttributesNative(Pointer data[], long dataSizes[], int attributes[], long numAttributes, CUdeviceptr devPtr, long count);
    
    /**
     * Set attributes on a previously allocated memory region

     * 

     * The supported attributes are:
     * 

     * 
     * CU_POINTER_ATTRIBUTE_SYNC_MEMOPS:
     *
     *      A boolean attribute that can either be set (1) or unset (0). When set,
     *      the region of memory that ptr points to is guaranteed to always synchronize
     *      memory operations that are synchronous. If there are some previously initiated
     *      synchronous memory operations that are pending when this attribute is set, the
     *      function does not return until those memory operations are complete.
     *      See further documentation in the section titled "API synchronization behavior"
     *      to learn more about cases when synchronous memory operations can
     *      exhibit asynchronous behavior.
     *      value will be considered as a pointer to an unsigned integer to which this attribute is to be set.
     * 
     * 
     * @param value     Pointer to memory containing the value to be set
     * @param attribute Pointer attribute to set
     * @param ptr       Pointer to a memory region allocated using CUDA memory allocation APIs
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuPointerGetAttribute,
     * @see JCudaDriver#cuPointerGetAttributes,
     * @see JCudaDriver#cuMemAlloc,
     * @see JCudaDriver#cuMemFree,
     * @see JCudaDriver#cuMemAllocHost,
     * @see JCudaDriver#cuMemFreeHost,
     * @see JCudaDriver#cuMemHostAlloc,
     * @see JCudaDriver#cuMemHostRegister,
     * @see JCudaDriver#cuMemHostUnregister
     */
    public static int cuPointerSetAttribute(Pointer value, int attribute, CUdeviceptr ptr)
    {
        return checkResult(cuPointerSetAttributeNative(value, attribute, ptr));
    }
    private static native int cuPointerSetAttributeNative(Pointer value, int attribute, CUdeviceptr ptr);


    /**
     * Returns information about a pointer.

     * 

     * The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions):
     * 
     * CU_POINTER_ATTRIBUTE_CONTEXT
     * CU_POINTER_ATTRIBUTE_MEMORY_TYPE
     * CU_POINTER_ATTRIBUTE_DEVICE_POINTER
     * CU_POINTER_ATTRIBUTE_HOST_POINTER
     * CU_POINTER_ATTRIBUTE_SYNC_MEMOPS
     * CU_POINTER_ATTRIBUTE_BUFFER_ID
     * CU_POINTER_ATTRIBUTE_IS_MANAGED
     * 
     * Unlike ::cuPointerGetAttribute, this function will not return an error when the ptr
     * encountered is not a valid CUDA pointer. Instead, the attributes are assigned default NULL values
     * and CUDA_SUCCESS is returned.

     *

     * If ptr was not allocated by, mapped by, or registered with a ::CUcontext which uses UVA
     * (Unified Virtual Addressing), ::CUDA_ERROR_INVALID_CONTEXT is returned.
     * 

     *
     * @param numAttributes Number of attributes to query
     * @param attributes An array of attributes to query
     * (numAttributes and the number of attributes in this array should match)
     * @param data A two-dimensional array containing pointers to memory
     * locations where the result of each attribute query will be written to.
     * @param ptr Pointer to query
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
     *
     * @see JCudaDriver#cuPointerGetAttribute,
     * @see JCudaDriver#cuPointerSetAttribute
     */
    public static int cuPointerGetAttributes(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr)
    {
        return checkResult(cuPointerGetAttributesNative(numAttributes, attributes, data, ptr));
    }
    private static native int cuPointerGetAttributesNative(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr);


    /**
     * Create a stream.
     *
     *      * CUresult cuStreamCreate (
     *      CUstream* phStream,
     *      unsigned int  Flags )
     * 
     * 
     *   Create a stream.  Creates a stream and
     *     returns a handle in phStream. The Flags argument
     *     determines behaviors of the stream. Valid values for Flags
     *     are:
     *   

     *     
     *       CU_STREAM_DEFAULT: Default
     *         stream creation flag.
     *       
     *     
     *     
     *       CU_STREAM_NON_BLOCKING:
     *         Specifies that work running in the created stream may run concurrently
     *         with work in stream 0 (the NULL stream), and that
     *         the created stream should
     *         perform no implicit synchronization with stream 0.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param phStream Returned newly created stream
     * @param Flags Parameters for stream creation
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */
    public static int cuStreamCreate(CUstream phStream, int Flags)
    {
        return checkResult(cuStreamCreateNative(phStream, Flags));
    }

    private static native int cuStreamCreateNative(CUstream phStream, int Flags);

    /**
     * Create a stream with the given priority
     *
     * Creates a stream with the specified priority and returns a handle in phStream.
     * This API alters the scheduler priority of work in the stream. Work in a higher
     * priority stream may preempt work already executing in a low priority stream.
     *
     * priority follows a convention where lower numbers represent higher priorities.
     * '0' represents default priority. The range of meaningful numerical priorities can
     * be queried using ::cuCtxGetStreamPriorityRange. If the specified priority is
     * outside the numerical range returned by ::cuCtxGetStreamPriorityRange,
     * it will automatically be clamped to the lowest or the highest number in the range.
     *
     * @param phStream Returned newly created stream
     * @param flags Flags for stream creation. See ::cuStreamCreate for a list of valid flags
     * @param priority Stream priority. Lower numbers represent higher priorities.
     * See ::cuCtxGetStreamPriorityRange for more information about
     * meaningful stream priorities that can be passed.
     * 
     * Note: Stream priorities are supported only on GPUs
     * with compute capability 3.5 or higher.
     *
     * Note: In the current implementation, only compute kernels launched in
     * priority streams are affected by the stream's priority. Stream priorities have
     * no effect on host-to-device and device-to-host memory operations.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, 
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamGetPriority
     * @see JCudaDriver#cuCtxGetStreamPriorityRange
     * @see JCudaDriver#cuStreamGetFlags
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     * @see JCudaDriver#cudaStreamCreateWithPriority
     */    
    public static int cuStreamCreateWithPriority(CUstream phStream, int flags, int priority)
    {
        return checkResult(cuStreamCreateWithPriorityNative(phStream, flags, priority));
    }
    private static native int cuStreamCreateWithPriorityNative(CUstream phStream, int flags, int priority);

    /**
     * Query the priority of a given stream.
     *
     * Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority
     * and return the priority in priority. Note that if the stream was created with a
     * priority outside the numerical range returned by ::cuCtxGetStreamPriorityRange,
     * this function returns the clamped priority.
     * See ::cuStreamCreateWithPriority for details about priority clamping.
     *
     * @param hStream Handle to the stream to be queried
     * @param priority Pointer to a signed integer in which the stream's priority is returned
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, 
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamCreateWithPriority
     * @see JCudaDriver#cuCtxGetStreamPriorityRange
     * @see JCudaDriver#cuStreamGetFlags
     * @see JCudaDriver#cudaStreamGetPriority
     */    
    public static int cuStreamGetPriority(CUstream hStream, int priority[])
    {
        return checkResult(cuStreamGetPriorityNative(hStream, priority));
    }
    private static native int cuStreamGetPriorityNative(CUstream hStream, int priority[]);

    /**
     * Query the flags of a given stream.
     *
     * Query the flags of a stream created using ::cuStreamCreate or
     * ::cuStreamCreateWithPriority and return the flags in flags.
     *
     * @param hStream Handle to the stream to be queried
     * @param flags Pointer to an unsigned integer in which the stream's flags
     *        are returned The value returned in flags is a logical 'OR' of
     *        all flags that were used while creating this stream. See
     *        ::cuStreamCreate for the list of valid flags 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamGetPriority
     * @see JCudaDriver#cudaStreamGetFlags
     */
    public static int cuStreamGetFlags(CUstream hStream, int flags[])
    {
        return checkResult(cuStreamGetFlagsNative(hStream, flags));
    }
    private static native int cuStreamGetFlagsNative(CUstream hStream, int flags[]);

    /**
     * Query the context associated with a stream.
     *
     * Returns the CUDA context that the stream is associated with. 
     *
     * The stream handle hStream can refer to any of the following:
     * 
     *   
     *     a stream created via any of the CUDA driver APIs such as ::cuStreamCreate
     *     and ::cuStreamCreateWithPriority, or their runtime API equivalents such as
     *     ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority.
     *     The returned context is the context that was active in the calling thread when the
     *     stream was created. Passing an invalid handle will result in undefined behavior.
     *   
     *   
     *     any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY 
     *     and ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are 
     *     also accepted, which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread 
     *     respectively. Specifying any of the special handles will return the context 
     *     current to the calling thread. If no context is current to the calling thread,
     *     ::CUDA_ERROR_INVALID_CONTEXT is returned.
     *   
     * 
     *
     * @param hStream Handle to the stream to be queried
     * @param pctx Returned context associated with the stream
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, 
     * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_HANDLE,
     *
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamCreateWithPriority
     * @see JCudaDriver#cuStreamGetPriority
     * @see JCudaDriver#cuStreamGetFlags
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     * @see JCudaDriver#cudaStreamCreate
     * @see JCudaDriver#cudaStreamCreateWithFlags
     */
    public static int cuStreamGetCtx(CUstream hStream, CUcontext pctx)
    {
        return checkResult(cuStreamGetCtxNative(hStream, pctx));
    }
    private static native int cuStreamGetCtxNative(CUstream hStream, CUcontext pctx);

    /**
     * Make a compute stream wait on an event.
     *
     *      * CUresult cuStreamWaitEvent (
     *      CUstream hStream,
     *      CUevent hEvent,
     *      unsigned int  Flags )
     * 
     * 
     *   Make a compute stream wait on an event.
     *     Makes all future work submitted to hStream wait until hEvent reports completion before beginning execution. This
     *     synchronization will be performed efficiently on the device. The event
     *     hEvent may be from a different
     *     context than hStream, in which case this function will
     *     perform cross-device synchronization.
     *   
     *   The stream hStream will wait
     *     only for the completion of the most recent host call to cuEventRecord()
     *     on hEvent. Once this call has returned, any functions
     *     (including cuEventRecord() and cuEventDestroy()) may be called on hEvent again, and subsequent calls will not have any effect on
     *     hStream.
     *   
     *   If hStream is 0 (the NULL
     *     stream) any future work submitted in any stream will wait for hEvent to complete before beginning execution. This effectively
     *     creates a barrier for all future work submitted to the context.
     *   
     *   If cuEventRecord() has not been called
     *     on hEvent, this call acts as if the record has already
     *     completed, and so is a functional no-op.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hStream Stream to wait
     * @param hEvent Event to wait on (may not be NULL)
     * @param Flags Parameters for the operation (must be 0)
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     *
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuEventRecord
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     * @see JCudaDriver#cuStreamDestroy
     */
    public static int cuStreamWaitEvent(CUstream hStream, CUevent hEvent, int Flags)
    {
        return checkResult(cuStreamWaitEventNative(hStream, hEvent, Flags));
    }
    private static native int cuStreamWaitEventNative(CUstream hStream, CUevent hEvent, int Flags);


    /**
     * Add a callback to a compute stream.
     * 
     * This function is slated for eventual deprecation and removal. If
     * you do not require the callback to execute in case of a device error,
     * consider using ::cuLaunchHostFunc. Additionally, this function is not
     * supported with ::cuStreamBeginCapture and ::cuStreamEndCapture, unlike
     * ::cuLaunchHostFunc.
     *
     *      * CUresult cuStreamAddCallback (
     *      CUstream hStream,
     *      CUstreamCallback callback,
     *      void* userData,
     *      unsigned int  flags )
     * 
     * 
     *   Add a callback to a compute stream.  Adds
     *     a callback to be called on the host after all currently enqueued items
     *     in the stream
     *     have completed. For each cuStreamAddCallback
     *     call, the callback will be executed exactly once. The callback will
     *     block later
     *     work in the stream until it is finished.
     *   
     *   The callback may be passed CUDA_SUCCESS
     *     or an error code. In the event of a device error, all subsequently
     *     executed callbacks will receive an appropriate CUresult.
     *   
     *   Callbacks must not make any CUDA API
     *     calls. Attempting to use a CUDA API will result in CUDA_ERROR_NOT_PERMITTED.
     *     Callbacks must not perform any synchronization that may depend on
     *     outstanding device work or other callbacks that are not
     *     mandated to run earlier. Callbacks
     *     without a mandated order (in independent streams) execute in undefined
     *     order and may be
     *     serialized.
     *   
     *   This API requires compute capability
     *     1.1 or greater. See cuDeviceGetAttribute or cuDeviceGetProperties to
     *     query compute capability. Attempting to use this API with earlier
     *     compute versions will return CUDA_ERROR_NOT_SUPPORTED.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hStream Stream to add callback to
     * @param callback The function to call once preceding stream operations are complete
     * @param userData User specified data to be passed to the callback function
     * @param flags Reserved for future use, must be 0
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_SUPPORTED
     *
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamDestroy
     */
    public static int cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, Object userData, int flags)
    {
        return checkResult(cuStreamAddCallbackNative(hStream, callback, userData, flags));
    }
    private static native int cuStreamAddCallbackNative(CUstream hStream, CUstreamCallback callback, Object userData, int flags);

    
    /**
     * Begins graph capture on a stream.
     *
     * Begin graph capture on \p hStream. When a stream is in capture mode, all operations
     * pushed into the stream will not be executed, but will instead be captured into
     * a graph, which will be returned via ::cuStreamEndCapture. Capture may not be initiated
     * if \p stream is CU_STREAM_LEGACY. Capture must be ended on the same stream in which
     * it was initiated, and it may only be initiated if the stream is not already in capture
     * mode. The capture mode may be queried via ::cuStreamIsCapturing.
     *
     * @param hStream - Stream in which to initiate capture
     *
     * Kernels captured using this API must not use texture and surface references.
     * Reading or writing through any texture or surface reference is undefined
     * behavior. This restriction does not apply to texture and surface objects.
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see
     * JCudaDriver#cuStreamCreate
     * JCudaDriver#cuStreamIsCapturing
     * JCudaDriver#cuStreamEndCapture
     */
    public static int cuStreamBeginCapture(CUstream hStream, int mode)
    {
        return checkResult(cuStreamBeginCaptureNative(hStream, mode));
    }
    private static native int cuStreamBeginCaptureNative(CUstream hStream, int mode);

    /**
     * Swaps the stream capture interaction mode for a thread.
     *
     * Sets the calling thread's stream capture interaction mode to the value contained
     * in \p *mode, and overwrites \p *mode with the previous mode for the thread. To
     * facilitate deterministic behavior across function or module boundaries, callers
     * are encouraged to use this API in a push-pop fashion:

     *  
     * CUstreamCaptureMode mode = desiredMode;
     * cuThreadExchangeStreamCaptureMode(&mode);
     * ...
     * cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode
     * 


     * 

     *
     * During stream capture (see ::cuStreamBeginCapture), some actions, such as a call
     * to ::cudaMalloc, may be unsafe. In the case of ::cudaMalloc, the operation is
     * not enqueued asynchronously to a stream, and is not observed by stream capture.
     * Therefore, if the sequence of operations captured via ::cuStreamBeginCapture
     * depended on the allocation being replayed whenever the graph is launched, the
     * captured graph would be invalid.

     * 

     * Therefore, stream capture places restrictions on API calls that can be made within
     * or concurrently to a ::cuStreamBeginCapture-::cuStreamEndCapture sequence. This
     * behavior can be controlled via this API and flags to ::cuStreamBeginCapture.
     * 

     * A thread's mode is one of the following:
     * 
     *   CU_STREAM_CAPTURE_MODE_GLOBAL: This is the default mode. If the local thread has
     *   an ongoing capture sequence that was not initiated with
     *   \p CU_STREAM_CAPTURE_MODE_RELAXED at \p cuStreamBeginCapture, or if any other thread
     *   has a concurrent capture sequence initiated with \p CU_STREAM_CAPTURE_MODE_GLOBAL,
     *   this thread is prohibited from potentially unsafe API calls.
     *   
     *   CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: If the local thread has an ongoing capture
     *   sequence not initiated with \p CU_STREAM_CAPTURE_MODE_RELAXED, it is prohibited
     *   from potentially unsafe API calls. Concurrent capture sequences in other threads
     *   are ignored.
     *   
     *   CU_STREAM_CAPTURE_MODE_RELAXED: The local thread is not prohibited from potentially
     *   unsafe API calls. Note that the thread is still prohibited from API calls which
     *   necessarily conflict with stream capture, for example, attempting ::cuEventQuery
     *   on an event that was last recorded inside a capture sequence.
     *   
     * 
     *
     * @param mode - Pointer to mode value to swap with the current mode
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuStreamBeginCapture
     */
    public static int cuThreadExchangeStreamCaptureMode(int mode[])
    {
        return checkResult(cuThreadExchangeStreamCaptureModeNative(mode));
    }
    private static native int cuThreadExchangeStreamCaptureModeNative(int mode[]);
    
    
    /**
     * Ends capture on a stream, returning the captured graph.
     *
     * End capture on \p hStream, returning the captured graph via \p phGraph.
     * Capture must have been initiated on \p hStream via a call to ::cuStreamBeginCapture.
     * If capture was invalidated, due to a violation of the rules of stream capture, then
     * a NULL graph will be returned.
     * 
     * If the \p mode argument to ::cuStreamBeginCapture was not
     * ::CU_STREAM_CAPTURE_MODE_RELAXED, this call must be from the same thread as
     * ::cuStreamBeginCapture.
     *
     * @param hStream - Stream to query
     * @param phGraph - The captured graph
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE
     * CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD
     *
     * @see
     * JCudaDriver#cuStreamCreate
     * JCudaDriver#cuStreamBeginCapture
     * JCudaDriver#cuStreamIsCapturing
     */
    public static int cuStreamEndCapture(CUstream hStream, CUgraph phGraph)
    {
        return checkResult(cuStreamEndCaptureNative(hStream, phGraph));
    }
    private static native int cuStreamEndCaptureNative(CUstream hStream, CUgraph phGraph);
    
    /**
     * Returns a stream's capture status
     *
     * Return the capture status of \p hStream via \p captureStatus. After a successful
     * call, \p *captureStatus will contain one of the following:
     * 
     * ::CU_STREAM_CAPTURE_STATUS_NONE: The stream is not capturing.
     * ::CU_STREAM_CAPTURE_STATUS_ACTIVE: The stream is capturing.
     * ::CU_STREAM_CAPTURE_STATUS_INVALIDATED: The stream was capturing but an error
     *   has invalidated the capture sequence. The capture sequence must be terminated
     *   with ::cuStreamEndCapture on the stream where it was initiated in order to
     *   continue using \p hStream.
     * 
     * Note that, if this is called on ::CU_STREAM_LEGACY (the "null stream") while
     * a blocking stream in the same context is capturing, it will return
     * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT and \p *captureStatus is unspecified
     * after the call. The blocking stream capture is not invalidated.

     * 

     * When a blocking stream is capturing, the legacy stream is in an
     * unusable state until the blocking stream capture is terminated. The legacy
     * stream is not supported for stream capture, but attempted use would have an
     * implicit dependency on the capturing stream(s).
     *
     * @param hStream       - Stream to query
     * @param captureStatus - Returns the stream's capture status
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_STREAM_CAPTURE_IMPLICIT
     *
     * @see
     * JCudaDriver#cuStreamCreate
     * JCudaDriver#cuStreamBeginCapture
     * JCudaDriver#cuStreamEndCapture
     */
    public static int cuStreamIsCapturing(CUstream hStream, int captureStatus[])
    {
        return checkResult(cuStreamIsCapturingNative(hStream, captureStatus));
    }
    private static native int cuStreamIsCapturingNative(CUstream hStream, int captureStatus[]);
    
    /**
     * Query capture status of a stream
     *
     * Query the capture status of a stream and and get an id for 
     * the capture sequence, which is unique over the lifetime of the process.
     *
     * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created 
     * with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT.
     *
     * A valid id is returned only if both of the following are true:
     * - the call returns CUDA_SUCCESS
     * - captureStatus is set to ::CU_STREAM_CAPTURE_STATUS_ACTIVE
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_STREAM_CAPTURE_IMPLICIT
     *
     * @see JCudaDriver#cuStreamBeginCapture,
     * @see JCudaDriver#cuStreamIsCapturing
     */
     public static int cuStreamGetCaptureInfo(CUstream hStream, int captureStatus[], long id[])
     {
         return checkResult(cuStreamGetCaptureInfoNative(hStream, captureStatus, id));
     }
     private static native int cuStreamGetCaptureInfoNative(CUstream hStream, int captureStatus[], long id[]);
    
    
    /**
     * Attach memory to a stream asynchronously.
     *
     * Enqueues an operation in hStream to specify stream association of
     * length bytes of memory starting from dptr. This function is a
     * stream-ordered operation, meaning that it is dependent on, and will
     * only take effect when, previous work in stream has completed. Any
     * previous association is automatically replaced.
     *
     * dptr must point to one of the following types of memories:
     * 
     *   managed memory declared using the __managed__ keyword or allocated with
     *   ::cuMemAllocManaged.
     *   a valid host-accessible region of system-allocated pageable memory. This
     *   type of memory may only be specified if the device associated with the
     *   stream reports a non-zero value for the device attribute
     *   ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
     * 
     *
     * For managed allocations, length must be either zero or the entire
     * allocation's size. Both indicate that the entire allocation's stream
     * association is being changed. Currently, it is not possible to change stream
     * association for a portion of a managed allocation.

     * 

     * For pageable host allocations, length must be non-zero.

     * 

     * The stream association is specified using flags which must be
     * one of ::CUmemAttach_flags.
     * If the ::CU_MEM_ATTACH_GLOBAL flag is specified, the memory can be accessed
     * by any stream on any device.
     * If the ::CU_MEM_ATTACH_HOST flag is specified, the program makes a guarantee
     * that it won't access the memory on the device from any stream on a device that
     * has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
     * If the ::CU_MEM_ATTACH_SINGLE flag is specified and hStream is associated with
     * a device that has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
     * the program makes a guarantee that it will only access the memory on the device
     * from hStream. It is illegal to attach singly to the NULL stream, because the
     * NULL stream is a virtual global stream and not a specific stream. An error will
     * be returned in this case.

     * 

     * When memory is associated with a single stream, the Unified Memory system will
     * allow CPU access to this memory region so long as all operations in hStream
     * have completed, regardless of whether other streams are active. In effect,
     * this constrains exclusive ownership of the managed memory region by
     * an active GPU to per-stream activity instead of whole-GPU activity.

     * 

     * Accessing memory on the device from streams that are not associated with
     * it will produce undefined results. No error checking is performed by the
     * Unified Memory system to ensure that kernels launched into other streams
     * do not access this region.

     * 

     * It is a program's responsibility to order calls to ::cuStreamAttachMemAsync
     * via events, synchronization or other means to ensure legal access to memory
     * at all times. Data visibility and coherency will be changed appropriately
     * for all kernels which follow a stream-association change.

     * 

     * If hStream is destroyed while data is associated with it, the association is
     * removed and the association reverts to the default visibility of the allocation
     * as specified at ::cuMemAllocManaged. For __managed__ variables, the default
     * association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an
     * asynchronous operation, and as a result, the change to default association won't
     * happen until all work in the stream has completed.
     *
     * @param hStream - Stream in which to enqueue the attach operation
     * @param  dptr    - Pointer to memory (must be a pointer to managed memory or
     *                  to a valid host-accessible region of system-allocated
     *                  pageable memory)
     * @param  length  - Length of memory
     * @param flags   - Must be one of ::CUmemAttach_flags
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_SUPPORTED
     *
     * @see JCudaDriver#cuStreamCreate
     * JCudaDriver#cuStreamQuery
     * JCudaDriver#cuStreamSynchronize
     * JCudaDriver#cuStreamWaitEvent
     * JCudaDriver#cuStreamDestroy
     * JCudaDriver#cuMemAllocManaged
     * JCudaDriver#cudaStreamAttachMemAsync
     */
    public static int cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, long length, int flags)
    {
        return checkResult(cuStreamAttachMemAsyncNative(hStream, dptr, length, flags));
    }
    private static native int cuStreamAttachMemAsyncNative(CUstream hStream, CUdeviceptr dptr, long length, int flags);


    /**
     * Determine status of a compute stream.
     *
     *      * CUresult cuStreamQuery (
     *      CUstream hStream )
     * 
     * 
     *   Determine status of a compute stream.
     *     Returns CUDA_SUCCESS if all operations in the stream specified by hStream have completed, or CUDA_ERROR_NOT_READY if not.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hStream Stream to query status of
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_READY
     *
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */
    public static int cuStreamQuery(CUstream hStream)
    {
        return checkResult(cuStreamQueryNative(hStream));
    }

    private static native int cuStreamQueryNative(CUstream hStream);


    /**
     * Wait until a stream's tasks are completed.
     *
     *      * CUresult cuStreamSynchronize (
     *      CUstream hStream )
     * 
     * 
     *   Wait until a stream's tasks are completed.
     *     Waits until the device has completed all operations in the stream
     *     specified by
     *     hStream. If the context was
     *     created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will
     *     block until the stream is finished with all of its tasks.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hStream Stream to wait for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
     *
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamDestroy
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamAddCallback
     */
    public static int cuStreamSynchronize(CUstream hStream)
    {
        return checkResult(cuStreamSynchronizeNative(hStream));
    }

    private static native int cuStreamSynchronizeNative(CUstream hStream);


    /**
     * Destroys a stream.
     *
     *      * CUresult cuStreamDestroy (
     *      CUstream hStream )
     * 
     * 
     *   Destroys a stream.  Destroys the stream
     *     specified by hStream.
     *   
     *   In case the device is still doing work
     *     in the stream hStream when cuStreamDestroy() is called, the
     *     function will return immediately and the resources associated with hStream will be released automatically once the device has
     *     completed all work in hStream.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param hStream Stream to destroy
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuStreamCreate
     * @see JCudaDriver#cuStreamWaitEvent
     * @see JCudaDriver#cuStreamQuery
     * @see JCudaDriver#cuStreamSynchronize
     * @see JCudaDriver#cuStreamAddCallback
     */
    public static int cuStreamDestroy(CUstream hStream)
    {
        return checkResult(cuStreamDestroyNative(hStream));
    }

    private static native int cuStreamDestroyNative(CUstream hStream);

    /**
     * Copies attributes from source stream to destination stream
     * 
     * Copies attributes from source stream \p src to destination stream \p dst.
     * Both streams must have the same context.
     *
     * @param dst Destination stream
     * @param src Source stream
     * 
     * For list of attributes see ::CUstreamAttrID
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuStreamCopyAttributes(CUstream dst, CUstream src)
    {
        return checkResult(cuStreamCopyAttributesNative(dst, src));
    }
    private static native int cuStreamCopyAttributesNative(CUstream dst, CUstream src);

    /**
     * Queries stream attribute.
     * 
     * Queries attribute attr from hStream and stores it in corresponding
     * member of value_out.
     *
     * @param hStream
     * @param attr 
     * @param value_out 
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuStreamGetAttribute(CUstream hStream, int attr, CUstreamAttrValue value_out)
    {
        return checkResult(cuStreamGetAttributeNative(hStream, attr, value_out));
    }
    private static native int cuStreamGetAttributeNative(CUstream hStream, int attr, CUstreamAttrValue value_out);

    /**
     * Sets stream attribute.
     * 
     * Sets attribute attr on hStream from corresponding attribute of
     * value. The updated attribute will be applied to subsequent work
     * submitted to the stream. It will not affect previously submitted work.
     *
     * @param hStream
     * @param attr
     * @param value
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE
     *  
     * @see CUaccessPolicyWindow
     */
    public static int cuStreamSetAttribute(CUstream hStream, int attr,  CUstreamAttrValue value)
    {
        return checkResult(cuStreamSetAttributeNative(hStream, attr, value));            
    }
    private static native int cuStreamSetAttributeNative(CUstream hStream, int attr, CUstreamAttrValue value);


    /**
     * Initializes OpenGL interoperability.
     *
     *      * CUresult cuGLInit (
     *      void )
     * 
     * 
     *   Initializes OpenGL interoperability.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Initializes OpenGL interoperability.
     *     This function is deprecated and calling it is no longer required. It
     *     may fail if the
     *     needed OpenGL driver facilities are
     *     not available.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuGLMapBufferObject
     * @see JCudaDriver#cuGLRegisterBufferObject
     * @see JCudaDriver#cuGLUnmapBufferObject
     * @see JCudaDriver#cuGLUnregisterBufferObject
     * @see JCudaDriver#cuGLMapBufferObjectAsync
     * @see JCudaDriver#cuGLUnmapBufferObjectAsync
     * @see JCudaDriver#cuGLSetBufferObjectMapFlags
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLInit()
    {
        return checkResult(cuGLInitNative());
    }
    private static native int cuGLInitNative();


    /**
     * Create a CUDA context for interoperability with OpenGL.
     *
     *      * CUresult cuGLCtxCreate (
     *      CUcontext* pCtx,
     *      unsigned int  Flags,
     *      CUdevice device )
     * 
     * 
     *   Create a CUDA context for
     *     interoperability with OpenGL.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 5.0.This function is deprecated and should
     *     no longer be used. It is no longer necessary to associate a CUDA
     *     context with an OpenGL
     *     context in order to achieve maximum
     *     interoperability performance.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCtx Returned CUDA context
     * @param Flags Options for CUDA context creation
     * @param device Device on which to create the context
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuGLInit
     * @see JCudaDriver#cuGLMapBufferObject
     * @see JCudaDriver#cuGLRegisterBufferObject
     * @see JCudaDriver#cuGLUnmapBufferObject
     * @see JCudaDriver#cuGLUnregisterBufferObject
     * @see JCudaDriver#cuGLMapBufferObjectAsync
     * @see JCudaDriver#cuGLUnmapBufferObjectAsync
     * @see JCudaDriver#cuGLSetBufferObjectMapFlags
     * 
     * @deprecated Deprecated as of CUDA 5.0
     */
    @Deprecated
    public static int cuGLCtxCreate( CUcontext pCtx, int Flags, CUdevice device )
    {
        return checkResult(cuGLCtxCreateNative(pCtx, Flags, device));
    }
    private static native int cuGLCtxCreateNative(CUcontext pCtx, int Flags, CUdevice device);


    /**
     * Gets the CUDA devices associated with the current OpenGL context.
     *
     *      * CUresult cuGLGetDevices (
     *      unsigned int* pCudaDeviceCount,
     *      CUdevice* pCudaDevices,
     *      unsigned int  cudaDeviceCount,
     *      CUGLDeviceList deviceList )
     * 
     * 
     *   Gets the CUDA devices associated with
     *     the current OpenGL context.  Returns in *pCudaDeviceCount
     *     the number of CUDA-compatible devices corresponding to the current
     *     OpenGL context. Also returns in *pCudaDevices at most
     *     cudaDeviceCount of the CUDA-compatible devices corresponding to the
     *     current OpenGL context. If any of the GPUs being
     *     used by the current OpenGL context are
     *     not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE.
     *   
     *   The deviceList argument may
     *     be any of the following:
     *   

     *     
     *       CU_GL_DEVICE_LIST_ALL: Query
     *         all devices used by the current OpenGL context.
     *       
     *     
     *     
     *       CU_GL_DEVICE_LIST_CURRENT_FRAME:
     *         Query the devices used by the current OpenGL context to render the
     *         current frame (in SLI).
     *       
     *     
     *     
     *       CU_GL_DEVICE_LIST_NEXT_FRAME:
     *         Query the devices used by the current OpenGL context to render the next
     *         frame (in SLI). Note that this is a prediction,
     *         it can't be guaranteed that this
     *         is correct in all cases.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCudaDeviceCount Returned number of CUDA devices.
     * @param pCudaDevices Returned CUDA devices.
     * @param cudaDeviceCount The size of the output device array pCudaDevices.
     * @param deviceList The set of devices to return.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_NO_DEVICE,
     * CUDA_ERROR_INVALID_VALUECUDA_ERROR_INVALID_CONTEXT
     *
     */
    public static int cuGLGetDevices(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList)
    {
        return checkResult(cuGLGetDevicesNative(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, CUGLDeviceList_deviceList));
    }
    private static native int cuGLGetDevicesNative(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList);

    /**
     * Registers an OpenGL buffer object.
     *
     *      * CUresult cuGraphicsGLRegisterBuffer (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
     * 
     *   Registers an OpenGL buffer object.
     *     Registers the buffer object specified by buffer for access
     *     by CUDA. A handle to the registered object is returned as pCudaResource. The register flags Flags specify the
     *     intended usage, as follows:
     *   
     *   
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from and
     *         written to by CUDA. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
     *         Specifies that CUDA will not write to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
     *         not read from this resource and will write over the entire
     *         contents of the resource, so
     *         none of the data previously stored in the resource will be preserved.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCudaResource Pointer to the returned object handle
     * @param buffer name of buffer object to be registered
     * @param Flags Register flags
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
     * CUDA_ERROR_INVALID_CONTEXT,
     *
     * @see JCudaDriver#cuGraphicsUnregisterResource
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */
    public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags)
    {
        return checkResult(cuGraphicsGLRegisterBufferNative(pCudaResource, buffer, Flags));
    }
    private static native int cuGraphicsGLRegisterBufferNative(CUgraphicsResource pCudaResource, int buffer, int Flags);




    /**
     * Register an OpenGL texture or renderbuffer object.
     *
     *      * CUresult cuGraphicsGLRegisterImage (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint image,
     *      GLenum target,
     *      unsigned int  Flags )
     * 
     * 
     *   Register an OpenGL texture or renderbuffer
     *     object.  Registers the texture or renderbuffer object specified by image for access by CUDA. A handle to the registered object is
     *     returned as pCudaResource.
     *   
     *   target must match the type of
     *     the object, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE,
     *     GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D,
     *     GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER.
     *   
     *   The register flags Flags
     *     specify the intended usage, as follows:
     *   
     *   
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from and
     *         written to by CUDA. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
     *         Specifies that CUDA will not write to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
     *         not read from this resource and will write over the entire
     *         contents of the resource, so
     *         none of the data previously stored in the resource will be preserved.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST:
     *         Specifies that CUDA will bind this resource to a surface
     *         reference.
     *       
     *     
     *     
     *       CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will
     *         perform texture gather operations on this resource.
     *       
     *     
     *   
     *   
     *   The following image formats are
     *     supported. For brevity's sake, the list is abbreviated. For ex., {GL_R,
     *     GL_RG} X {8, 16} would
     *     expand to the following 4 formats {GL_R8,
     *     GL_R16, GL_RG8, GL_RG16} :
     *   

     *     
     *       GL_RED, GL_RG, GL_RGBA,
     *         GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY
     *       
     *     
     *     
     *       {GL_R, GL_RG, GL_RGBA} X {8,
     *         16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I}
     *       
     *     
     *     
     *       {GL_LUMINANCE, GL_ALPHA,
     *         GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT,
     *         16UI_EXT, 32UI_EXT, 8I_EXT,
     *         16I_EXT, 32I_EXT}
     *       
     *     
     *   
     *   
     *   The following image classes are currently
     *     disallowed:
     *   

     *     
     *       Textures with borders
     *     
     *     
     *       Multisampled renderbuffers
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pCudaResource Pointer to the returned object handle
     * @param image name of texture or renderbuffer object to be registered
     * @param target Identifies the type of object specified by image
     * @param Flags Register flags
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
     * CUDA_ERROR_INVALID_CONTEXT,
     *
     * @see JCudaDriver#cuGraphicsUnregisterResource
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     */
    public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags )
    {
        return checkResult(cuGraphicsGLRegisterImageNative(pCudaResource, image, target, Flags));
    }
    private static native int cuGraphicsGLRegisterImageNative(CUgraphicsResource pCudaResource, int image, int target, int Flags);


    /**
     * Registers an OpenGL buffer object.
     *
     *      * CUresult cuGLRegisterBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Registers an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Registers the buffer object specified
     *     by buffer for access by CUDA. This function must be called
     *     before CUDA can map the buffer object. There must be a valid OpenGL
     *     context
     *     bound to the current thread when this
     *     function is called, and the buffer name is resolved by that context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param buffer The name of the buffer object to register.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED
     *
     * @see JCudaDriver#cuGraphicsGLRegisterBuffer
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLRegisterBufferObject( int bufferobj )
    {
        throw new UnsupportedOperationException(
            "This function is deprecated as of CUDA 3.0");
    }


    /**
     * Maps an OpenGL buffer object.
     *
     *      * CUresult cuGLMapBufferObject (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer )
     * 
     * 
     *   Maps an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Maps the buffer object specified by
     *     buffer into the address space of the current CUDA context
     *     and returns in *dptr and *size the base pointer
     *     and size of the resulting mapping.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   All streams in the current CUDA
     *     context are synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Returned mapped base pointer
     * @param size Returned size of mapping
     * @param buffer The name of the buffer object to map
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_MAP_FAILED
     *
     * @see JCudaDriver#cuGraphicsMapResources
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLMapBufferObject( CUdeviceptr dptr, long size[],  int bufferobj )
    {
        return checkResult(cuGLMapBufferObjectNative(dptr, size, bufferobj));
    }
    private static native int cuGLMapBufferObjectNative(CUdeviceptr dptr, long size[],  int bufferobj);


    /**
     * Unmaps an OpenGL buffer object.
     *
     *      * CUresult cuGLUnmapBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Unmaps an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unmaps the buffer object specified by
     *     buffer for access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   All streams in the current CUDA
     *     context are synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param buffer Buffer object to unmap
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphicsUnmapResources
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLUnmapBufferObject( int bufferobj )
    {
        return checkResult(cuGLUnmapBufferObjectNative(bufferobj));
    }
    private static native int cuGLUnmapBufferObjectNative(int bufferobj);


    /**
     * Unregister an OpenGL buffer object.
     *
     *      * CUresult cuGLUnregisterBufferObject (
     *      GLuint buffer )
     * 
     * 
     *   Unregister an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unregisters the buffer object specified
     *     by buffer. This releases any resources associated with the
     *     registered buffer. After this call, the buffer may no longer be mapped
     *     for
     *     access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param buffer Name of the buffer object to unregister
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphicsUnregisterResource
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLUnregisterBufferObject( int bufferobj )
    {
        return checkResult(cuGLUnregisterBufferObjectNative(bufferobj));
    }
    private static native int cuGLUnregisterBufferObjectNative(int bufferobj);



    /**
     * Set the map flags for an OpenGL buffer object.
     *
     *      * CUresult cuGLSetBufferObjectMapFlags (
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
     * 
     *   Set the map flags for an OpenGL buffer
     *     object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Sets the map flags for the buffer
     *     object specified by buffer.
     *   
     *   Changes to Flags will take
     *     effect the next time buffer is mapped. The Flags
     *     argument may be any of the following:
     *   

     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that this
     *         resource will be read from
     *         and written to by CUDA kernels. This is the default value.
     *       
     *     
     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY:
     *         Specifies that CUDA kernels which access this resource will not write
     *         to this resource.
     *       
     *     
     *     
     *       CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
     *         which access this resource will not read from this resource
     *         and will write over the
     *         entire contents of the resource, so none of the data previously stored
     *         in the resource will be preserved.
     *       
     *     
     *   
     *   
     *   If buffer has not been
     *     registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is
     *     returned. If buffer is presently mapped for access by CUDA,
     *     then CUDA_ERROR_ALREADY_MAPPED is returned.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param buffer Buffer object to unmap
     * @param Flags Map flags
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
     *
     * @see JCudaDriver#cuGraphicsResourceSetMapFlags
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLSetBufferObjectMapFlags( int buffer, int Flags )
    {
        return checkResult((cuGLSetBufferObjectMapFlagsNative(buffer, Flags)));
    }
    private static native int cuGLSetBufferObjectMapFlagsNative( int buffer, int Flags );


    /**
     * Maps an OpenGL buffer object.
     *
     *      * CUresult cuGLMapBufferObjectAsync (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer,
     *      CUstream hStream )
     * 
     * 
     *   Maps an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Maps the buffer object specified by
     *     buffer into the address space of the current CUDA context
     *     and returns in *dptr and *size the base pointer
     *     and size of the resulting mapping.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   Stream hStream in the
     *     current CUDA context is synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param dptr Returned mapped base pointer
     * @param size Returned size of mapping
     * @param buffer The name of the buffer object to map
     * @param hStream Stream to synchronize
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_MAP_FAILED
     *
     * @see JCudaDriver#cuGraphicsMapResources
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLMapBufferObjectAsync( CUdeviceptr dptr, long size[],  int buffer, CUstream hStream)
    {
        return checkResult((cuGLMapBufferObjectAsyncNative(dptr, size, buffer, hStream)));
    }
    private static native int cuGLMapBufferObjectAsyncNative( CUdeviceptr dptr, long size[],  int buffer, CUstream hStream);


    /**
     * Unmaps an OpenGL buffer object.
     *
     *      * CUresult cuGLUnmapBufferObjectAsync (
     *      GLuint buffer,
     *      CUstream hStream )
     * 
     * 
     *   Unmaps an OpenGL buffer object.
     *     DeprecatedThis function is
     *     deprecated as of Cuda 3.0.Unmaps the buffer object specified by
     *     buffer for access by CUDA.
     *   
     *   There must be a valid OpenGL context
     *     bound to the current thread when this function is called. This must be
     *     the same context,
     *     or a member of the same shareGroup,
     *     as the context that was bound when the buffer was registered.
     *   
     *   Stream hStream in the
     *     current CUDA context is synchronized with the current GL context.
     *   
     *   
     *     Note:
     *     Note that
     *       this function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param buffer Name of the buffer object to unmap
     * @param hStream Stream to synchronize
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuGraphicsUnmapResources
     * 
     * @deprecated Deprecated as of CUDA 3.0
     */
    @Deprecated
    public static int cuGLUnmapBufferObjectAsync( int buffer, CUstream hStream )
    {
        return checkResult((cuGLUnmapBufferObjectAsyncNative(buffer, hStream)));
    }
    private static native int cuGLUnmapBufferObjectAsyncNative( int buffer, CUstream hStream );




    /**
     * Unregisters a graphics resource for access by CUDA.
     *
     *      * CUresult cuGraphicsUnregisterResource (
     *      CUgraphicsResource resource )
     * 
     * 
     *   Unregisters a graphics resource for
     *     access by CUDA.  Unregisters the graphics resource resource
     *     so it is not accessible by CUDA unless registered again.
     *   
     *   If resource is invalid then
     *     CUDA_ERROR_INVALID_HANDLE is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param resource Resource to unregister
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuGraphicsGLRegisterBuffer
     * @see JCudaDriver#cuGraphicsGLRegisterImage
     */
    public static int cuGraphicsUnregisterResource(CUgraphicsResource resource)
    {
        return checkResult(cuGraphicsUnregisterResourceNative(resource));
    }
    private static native int cuGraphicsUnregisterResourceNative(CUgraphicsResource resource);


    /**
     * Get an array through which to access a subresource of a mapped graphics resource.
     *
     *      * CUresult cuGraphicsSubResourceGetMappedArray (
     *      CUarray* pArray,
     *      CUgraphicsResource resource,
     *      unsigned int  arrayIndex,
     *      unsigned int  mipLevel )
     * 
     * 
     *   Get an array through which to access a
     *     subresource of a mapped graphics resource.  Returns in *pArray
     *     an array through which the subresource of the mapped graphics resource
     *     resource which corresponds to array index arrayIndex
     *     and mipmap level mipLevel may be accessed. The value set in
     *     *pArray may change every time that resource is
     *     mapped.
     *   
     *   If resource is not a texture
     *     then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY
     *     is returned. If arrayIndex is not a valid array index for
     *     resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then
     *     CUDA_ERROR_INVALID_VALUE is returned. If resource is not
     *     mapped then CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pArray Returned array through which a subresource of resource may be accessed
     * @param resource Mapped resource to access
     * @param arrayIndex Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access
     * @param mipLevel Mipmap level for the subresource to access
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
     *
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */
    public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel)
    {
        return checkResult(cuGraphicsSubResourceGetMappedArrayNative(pArray, resource, arrayIndex, mipLevel));
    }
    private static native int cuGraphicsSubResourceGetMappedArrayNative(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel);


    /**
     * Get a mipmapped array through which to access a mapped graphics resource.
     *
     *      * CUresult cuGraphicsResourceGetMappedMipmappedArray (
     *      CUmipmappedArray* pMipmappedArray,
     *      CUgraphicsResource resource )
     * 
     * 
     *   Get a mipmapped array through which to
     *     access a mapped graphics resource.  Returns in *pMipmappedArray
     *     a mipmapped array through which the mapped graphics resource resource. The value set in *pMipmappedArray may change
     *     every time that resource is mapped.
     *   
     *   If resource is not a texture
     *     then it cannot be accessed via a mipmapped array and
     *     CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If resource is
     *     not mapped then CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pMipmappedArray Returned mipmapped array through which resource may be accessed
     * @param resource Mapped resource to access
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
     *
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     */
    public static int cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource)
    {
        return checkResult(cuGraphicsResourceGetMappedMipmappedArrayNative(pMipmappedArray, resource));
    }
    private static native int cuGraphicsResourceGetMappedMipmappedArrayNative(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource);


    /**
     * Get a device pointer through which to access a mapped graphics resource.
     *
     *      * CUresult cuGraphicsResourceGetMappedPointer (
     *      CUdeviceptr* pDevPtr,
     *      size_t* pSize,
     *      CUgraphicsResource resource )
     * 
     * 
     *   Get a device pointer through which to
     *     access a mapped graphics resource.  Returns in *pDevPtr a
     *     pointer through which the mapped graphics resource resource
     *     may be accessed. Returns in pSize the size of the memory in
     *     bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is
     *     mapped.
     *   
     *   If resource is not a buffer
     *     then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER
     *     is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED
     *     is returned. *
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pDevPtr Returned pointer through which resource may be accessed
     * @param pSize Returned size of the buffer accessible starting at *pPointer
     * @param resource Mapped resource to access
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_POINTER
     *
     * @see JCudaDriver#cuGraphicsMapResources
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     */
    public static int cuGraphicsResourceGetMappedPointer( CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource )
    {
        return checkResult(cuGraphicsResourceGetMappedPointerNative(pDevPtr, pSize, resource));
    }
    private static native int cuGraphicsResourceGetMappedPointerNative(CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource);


    /**
     * Set usage flags for mapping a graphics resource.
     *
     *      * CUresult cuGraphicsResourceSetMapFlags (
     *      CUgraphicsResource resource,
     *      unsigned int  flags )
     * 
     * 
     *   Set usage flags for mapping a graphics
     *     resource.  Set flags for mapping the graphics resource resource.
     *   
     *   Changes to flags will take
     *     effect the next time resource is mapped. The flags
     *     argument may be any of the following:
     *   
     *   
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE:
     *         Specifies no hints about how this resource will be used. It is therefore
     *         assumed that
     *         this resource will be read from
     *         and written to by CUDA kernels. This is the default value.
     *       
     *     
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY:
     *         Specifies that CUDA kernels which access this resource will not write
     *         to this resource.
     *       
     *     
     *     
     *       CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA
     *         kernels which access this resource will not read from this
     *         resource and will write over
     *         the entire contents of the resource, so none of the data previously
     *         stored in the resource will
     *         be preserved.
     *       
     *     
     *   
     *   
     *   If resource is presently
     *     mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned.
     *     If flags is not one of the above values then
     *     CUDA_ERROR_INVALID_VALUE is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param resource Registered resource to set flags for
     * @param flags Parameters for resource mapping
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED
     *
     * @see JCudaDriver#cuGraphicsMapResources
     */
    public static int cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, int flags )
    {
        return checkResult(cuGraphicsResourceSetMapFlagsNative(resource, flags));
    }
    private static native int cuGraphicsResourceSetMapFlagsNative( CUgraphicsResource resource, int flags );


    /**
     * Map graphics resources for access by CUDA.
     *
     *      * CUresult cuGraphicsMapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
     * 
     *   Map graphics resources for access by
     *     CUDA.  Maps the count graphics resources in resources
     *     for access by CUDA.
     *   
     *   The resources in resources
     *     may be accessed by CUDA until they are unmapped. The graphics API from
     *     which resources were registered should not access any
     *     resources while they are mapped by CUDA. If an application does so,
     *     the results are
     *     undefined.
     *   
     *   This function provides the synchronization
     *     guarantee that any graphics calls issued before cuGraphicsMapResources()
     *     will complete before any subsequent CUDA work issued in stream
     *     begins.
     *   
     *   If resources includes any
     *     duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
     *     of resources are presently mapped for access by CUDA then
     *     CUDA_ERROR_ALREADY_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param count Number of resources to map
     * @param resources Resources to map for CUDA usage
     * @param hStream Stream with which to synchronize
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuGraphicsResourceGetMappedPointer
     * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
     * @see JCudaDriver#cuGraphicsUnmapResources
     */
    public static int cuGraphicsMapResources(int count, CUgraphicsResource resources[], CUstream hStream)
    {
        return checkResult(cuGraphicsMapResourcesNative(count, resources, hStream));
    }
    private static native int cuGraphicsMapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);


    /**
     * Unmap graphics resources.
     *
     *      * CUresult cuGraphicsUnmapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
     * 
     *   Unmap graphics resources.  Unmaps the
     *     count graphics resources in resources.
     *   
     *   Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped
     *     again.
     *   
     *   This function provides the synchronization
     *     guarantee that any CUDA work issued in stream before
     *     cuGraphicsUnmapResources() will complete before any subsequently issued
     *     graphics work begins.
     *   
     *   If resources includes any
     *     duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
     *     of resources are not presently mapped for access by CUDA then
     *     CUDA_ERROR_NOT_MAPPED is returned.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param count Number of resources to unmap
     * @param resources Resources to unmap
     * @param hStream Stream with which to synchronize
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuGraphicsMapResources
     */
    public static int cuGraphicsUnmapResources( int count, CUgraphicsResource resources[], CUstream hStream)
    {
        return checkResult(cuGraphicsUnmapResourcesNative(count, resources, hStream));
    }
    private static native int cuGraphicsUnmapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);


    /**
     * Returns a module handle
     *
     * Returns in *hmod the handle of the module that function hfunc
     * is located in. The lifetime of the module corresponds to the lifetime of
     * the context it was loaded in or until the module is explicitly unloaded.
     *
     * The CUDA runtime manages its own modules loaded into the primary context.
     * If the handle returned by this API refers to a module loaded by the CUDA runtime,
     * calling ::cuModuleUnload() on that module will result in undefined behavior.
     *
     * @param hmod - Returned module handle
     * @param hfunc   - Function to retrieve module for
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND
     */
    public static int cuFuncGetModule(CUmodule hmod, CUfunction hfunc)
    {
        return checkResult(cuFuncGetModuleNative(hmod, hfunc));
    }
    private static native int cuFuncGetModuleNative(CUmodule hmod, CUfunction hfunc);

    /**
     * Set resource limits.
     *
     *      * CUresult cuCtxSetLimit (
     *      CUlimit limit,
     *      size_t value )
     * 
     * 
     *   Set resource limits.  Setting limit to value is a request by the application to
     *     update the current limit maintained by the context. The driver is free
     *     to modify the requested
     *     value to meet h/w requirements (this
     *     could be clamping to minimum or maximum values, rounding up to nearest
     *     element size,
     *     etc). The application can use
     *     cuCtxGetLimit() to find out exactly what the limit has been set to.
     *   
     *   Setting each CUlimit has its own specific
     *     restrictions, so each is discussed here.
     *   
     *   
     *     
     *       CU_LIMIT_STACK_SIZE controls
     *         the stack size in bytes of each GPU thread. This limit is only
     *         applicable to devices of compute capability 2.0 and
     *         higher. Attempting to set this
     *         limit on devices of compute capability less than 2.0 will result in
     *         the error CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_PRINTF_FIFO_SIZE
     *         controls the size in bytes of the FIFO used by the printf() device
     *         system call. Setting CU_LIMIT_PRINTF_FIFO_SIZE must be performed before
     *         launching any kernel that uses the printf() device system call,
     *         otherwise CUDA_ERROR_INVALID_VALUE will be returned. This limit is only
     *         applicable to devices of compute capability 2.0 and higher. Attempting
     *         to set this limit
     *         on devices of compute capability
     *         less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
     *         being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_MALLOC_HEAP_SIZE
     *         controls the size in bytes of the heap used by the malloc() and free()
     *         device system calls. Setting CU_LIMIT_MALLOC_HEAP_SIZE must be performed
     *         before launching any kernel that uses the malloc() or free() device
     *         system calls, otherwise CUDA_ERROR_INVALID_VALUE will be returned. This
     *         limit is only applicable to devices of compute capability 2.0 and
     *         higher. Attempting to set this limit
     *         on devices of compute capability
     *         less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
     *         being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH
     *         controls the maximum nesting depth of a grid at which a thread can
     *         safely call cudaDeviceSynchronize(). Setting this limit
     *         must be performed before any
     *         launch of a kernel that uses the device runtime and calls
     *         cudaDeviceSynchronize() above the default
     *         sync depth, two levels of grids.
     *         Calls to cudaDeviceSynchronize() will fail with error code
     *         cudaErrorSyncDepthExceeded if
     *         the limitation is violated. This
     *         limit can be set smaller than the default or up the maximum launch
     *         depth of 24. When setting
     *         this limit, keep in mind that
     *         additional levels of sync depth require the driver to reserve large
     *         amounts of device memory
     *         which can no longer be used for
     *         user allocations. If these reservations of device memory fail,
     *         cuCtxSetLimit will return CUDA_ERROR_OUT_OF_MEMORY, and the limit can
     *         be reset to a lower value. This limit is only applicable to devices of
     *         compute capability 3.5 and higher.
     *         Attempting to set this limit on
     *         devices of compute capability less than 3.5 will result in the error
     *         CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *   
     *     
     *       CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number
     *         of outstanding device runtime launches that can be made from the
     *         current context. A grid is outstanding
     *         from the point of launch up
     *         until the grid is known to have been completed. Device runtime launches
     *         which violate this limitation
     *         fail and return
     *         cudaErrorLaunchPendingCountExceeded when cudaGetLastError() is called
     *         after launch. If more pending launches
     *         than the default (2048 launches)
     *         are needed for a module using the device runtime, this limit can be
     *         increased. Keep in mind
     *         that being able to sustain
     *         additional pending launches will require the driver to reserve larger
     *         amounts of device memory
     *         upfront which can no longer be
     *         used for allocations. If these reservations fail, cuCtxSetLimit will
     *         return CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower
     *         value. This limit is only applicable to devices of compute capability
     *         3.5 and higher.
     *         Attempting to set this limit on
     *         devices of compute capability less than 3.5 will result in the error
     *         CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
     *       
     *     
     *   
     *   
     *     
     *       CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity.
     *         Values can range from 0B to 128B. This is purely a performance hint and
     *         it can be ignored or clamped depending on the platform.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param limit Limit to set
     * @param value Size of limit
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_UNSUPPORTED_LIMIT,
     * CUDA_ERROR_OUT_OF_MEMORY
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxSetLimit(int limit, long value)
    {
        return checkResult(cuCtxSetLimitNative(limit, value));
    }
    private static native int cuCtxSetLimitNative(int limit, long value);



    /**
     * Returns the preferred cache configuration for the current context.
     *
     *      * CUresult cuCtxGetCacheConfig (
     *      CUfunc_cache* pconfig )
     * 
     * 
     *   Returns the preferred cache configuration
     *     for the current context.  On devices where the L1 cache and shared
     *     memory use the
     *     same hardware resources, this function
     *     returns through pconfig the preferred cache configuration
     *     for the current context. This is only a preference. The driver will
     *     use the requested configuration
     *     if possible, but it is free to choose a
     *     different configuration if required to execute functions.
     *   
     *   This will return a pconfig of
     *     CU_FUNC_CACHE_PREFER_NONE on devices where the size of the L1 cache
     *     and shared memory are fixed.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pconfig Returned cache configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuFuncSetCacheConfig
     */
    public static int cuCtxGetCacheConfig(int pconfig[])
    {
        return checkResult(cuCtxGetCacheConfigNative(pconfig));
    }
    private static native int cuCtxGetCacheConfigNative(int[] pconfig);

    /**
     * Sets the preferred cache configuration for the current context.
     *
     *      * CUresult cuCtxSetCacheConfig (
     *      CUfunc_cache config )
     * 
     * 
     *   Sets the preferred cache configuration
     *     for the current context.  On devices where the L1 cache and shared
     *     memory use the same
     *     hardware resources, this sets through
     *     config the preferred cache configuration for the current
     *     context. This is only a preference. The driver will use the requested
     *     configuration
     *     if possible, but it is free to choose a
     *     different configuration if required to execute the function. Any
     *     function preference
     *     set via cuFuncSetCacheConfig() will be
     *     preferred over this context-wide setting. Setting the context-wide
     *     cache configuration to CU_FUNC_CACHE_PREFER_NONE will cause subsequent
     *     kernel launches to prefer to not change the cache configuration unless
     *     required to launch the kernel.
     *   
     *   This setting does nothing on devices
     *     where the size of the L1 cache and shared memory are fixed.
     *   
     *   Launching a kernel with a different
     *     preference than the most recent preference setting may insert a
     *     device-side synchronization
     *     point.
     *   
     *   The supported cache configurations are:
     *   

     *     
     *       CU_FUNC_CACHE_PREFER_NONE: no
     *         preference for shared memory or L1 (default)
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_SHARED:
     *         prefer larger shared memory and smaller L1 cache
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_L1: prefer
     *         larger L1 cache and smaller shared memory
     *       
     *     
     *     
     *       CU_FUNC_CACHE_PREFER_EQUAL:
     *         prefer equal sized L1 cache and shared memory
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param config Requested cache configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuFuncSetCacheConfig
     */
    public static int cuCtxSetCacheConfig(int config)
    {
        return checkResult(cuCtxSetCacheConfigNative(config));
    }
    private static native int cuCtxSetCacheConfigNative(int config);


    /**
     * Returns the current shared memory configuration for the current context.
     *
     *      * CUresult cuCtxGetSharedMemConfig (
     *      CUsharedconfig* pConfig )
     * 
     * 
     *   Returns the current shared memory
     *     configuration for the current context.  This function will return in
     *     pConfig the current size of shared memory banks in the
     *     current context. On devices with configurable shared memory banks,
     *     cuCtxSetSharedMemConfig can be used to change this setting, so that
     *     all subsequent kernel launches will by default use the new bank size.
     *     When cuCtxGetSharedMemConfig is called on devices without configurable
     *     shared memory, it will return the fixed bank size of the hardware.
     *   
     *   The returned bank configurations can be
     *     either:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is
     *         four bytes.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width
     *         will eight bytes.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pConfig returned shared memory configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     */
    public static int cuCtxGetSharedMemConfig(int pConfig[])
    {
        return checkResult(cuCtxGetSharedMemConfigNative(pConfig));
    }
    private static native int cuCtxGetSharedMemConfigNative(int pConfig[]);


    /**
     * Sets the shared memory configuration for the current context.
     *
     *      * CUresult cuCtxSetSharedMemConfig (
     *      CUsharedconfig config )
     * 
     * 
     *   Sets the shared memory configuration for
     *     the current context.  On devices with configurable shared memory banks,
     *     this function
     *     will set the context's shared memory bank
     *     size which is used for subsequent kernel launches.
     *   
     *   Changed the shared memory configuration
     *     between launches may insert a device side synchronization point between
     *     those launches.
     *   
     *   Changing the shared memory bank size
     *     will not increase shared memory usage or affect occupancy of kernels,
     *     but may have major
     *     effects on performance. Larger bank sizes
     *     will allow for greater potential bandwidth to shared memory, but will
     *     change what
     *     kinds of accesses to shared memory will
     *     result in bank conflicts.
     *   
     *   This function will do nothing on devices
     *     with fixed shared memory bank size.
     *   
     *   The supported bank configurations are:
     *   

     *     
     *       CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
     *         set bank width to the default initial setting (currently, four bytes).
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
     *         to be natively four bytes.
     *       
     *     
     *     
     *       CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
     *         width to be natively eight bytes.
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param config requested shared memory configuration
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     * @see JCudaDriver#cuCtxGetSharedMemConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     */
    public static int cuCtxSetSharedMemConfig(int config)
    {
        return checkResult(cuCtxSetSharedMemConfigNative(config));
    }
    private static native int cuCtxSetSharedMemConfigNative(int config);


    /**
     * Gets the context's API version.
     *
     *      * CUresult cuCtxGetApiVersion (
     *      CUcontext ctx,
     *      unsigned int* version )
     * 
     * 
     *   Gets the context's API version.  Returns
     *     a version number in version corresponding to the capabilities
     *     of the context (e.g. 3010 or 3020), which library developers can use
     *     to direct callers
     *     to a specific API version. If ctx is NULL, returns the API version used to create the currently
     *     bound context.
     *   
     *   Note that new API versions are only
     *     introduced when context capabilities are changed that break binary
     *     compatibility, so the
     *     API version and driver version may be
     *     different. For example, it is valid for the API version to be 3020
     *     while the driver
     *     version is 4020.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param ctx Context to check
     * @param version Pointer to version
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxGetLimit
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxGetApiVersion(CUcontext ctx, int version[])
    {
        return checkResult(cuCtxGetApiVersionNative(ctx, version));
    }
    private static native int cuCtxGetApiVersionNative(CUcontext ctx, int version[]);


    /**
     * Returns numerical values that correspond to the least and
     * greatest stream priorities. 

     *

     * Returns in *leastPriority and *greatestPriority the numerical values that correspond
     * to the least and greatest stream priorities respectively. Stream priorities
     * follow a convention where lower numbers imply greater priorities. The range of
     * meaningful stream priorities is given by [*greatestPriority, *leastPriority].
     * If the user attempts to create a stream with a priority value that is
     * outside the meaningful range as specified by this API, the priority is
     * automatically clamped down or up to either *leastPriority or *greatestPriority
     * respectively. See ::cuStreamCreateWithPriority for details on creating a
     * priority stream.

     * A NULL may be passed in for *leastPriority or *greatestPriority if the value
     * is not desired.

     * 

     * This function will return '0' in both \p *leastPriority and \p *greatestPriority if
     * the current context's device does not support stream priorities
     * (see ::cuDeviceGetAttribute).
     *
     * @param leastPriority    Pointer to an int in which the numerical value for least
     *                         stream priority is returned
     * @param greatestPriority Pointer to an int in which the numerical value for greatest
     *                         stream priority is returned
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
     *
     * @see JCudaDriver#cuStreamCreateWithPriority
     * @see JCudaDriver#cuStreamGetPriority,
     * @see JCudaDriver#cuCtxGetDevice,
     * @see JCudaDriver#cuCtxSetLimit,
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxGetStreamPriorityRange(int leastPriority[], int greatestPriority[])
    {
        return checkResult(cuCtxGetStreamPriorityRangeNative(leastPriority, greatestPriority));
    }
    private static native int cuCtxGetStreamPriorityRangeNative(int leastPriority[], int greatestPriority[]);


    /**
     * Resets all persisting lines in cache to normal status.
     *
     * ::cuCtxResetPersistingL2Cache Resets all persisting lines in cache to normal
     * status. Takes effect on function return. 
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_NOT_SUPPORTED
     *
     * @see CUaccessPolicyWindow
     */
    public static int cuCtxResetPersistingL2Cache() 
    {
        return checkResult(cuCtxResetPersistingL2CacheNative());
    }
    private static native int cuCtxResetPersistingL2CacheNative();
    
    /**
     * Launches a CUDA function.
     *
     * 
     *   
     *     
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *         
     *       
     *       
     *         
     *         
     *         
     *         
     *         
     *       
     *     CUresult cuLaunchKernel           ( CUfunction   f, 
unsigned int   gridDimX, 
unsigned int   gridDimY, 
unsigned int   gridDimZ, 
unsigned int   blockDimX, 
unsigned int   blockDimY, 
unsigned int   blockDimZ, 
unsigned int   sharedMemBytes, 
CUstream   hStream, 
void **   kernelParams, 
void **   extra  
) 
     *   
     *   
     *     
     *       Invokes the kernel f on a gridDimX x
     *       gridDimY x gridDimZ grid of blocks. Each
     *       block contains blockDimX x blockDimY x
     *       blockDimZ threads.
     *     

     *       sharedMemBytes sets the amount of dynamic shared memory
     *       that will be available to each thread block.
     *     

     *       cuLaunchKernel() can optionally be associated to a stream by passing a
     *       non-zero hStream argument.
     *     

     *       Kernel parameters to f can be specified in one of two
     *       ways:
     *     

     *       1) Kernel parameters can be specified via kernelParams.
     *       If f has N parameters, then kernelParams
     *       needs to be an array of N pointers. Each of kernelParams[0]
     *       through kernelParams[N-1] must point to a region of memory
     *       from which the actual kernel parameter will be copied. The number of
     *       kernel parameters and their offsets and sizes do not need to be
     *       specified as that information is retrieved directly from the kernel's
     *       image.
     *     

     *       2) Kernel parameters can also be packaged by the application into a
     *       single buffer that is passed in via the extra parameter.
     *       This places the burden on the application of knowing each kernel
     *       parameter's size and alignment/padding within the buffer. Here is an
     *       example of using the extra parameter in this manner:
     *     

     *           size_t argBufferSize;
     *     char argBuffer[256];
     *
     *     // populate argBuffer and argBufferSize
     *
     *     void *config[] = {
     *         CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
     *         CU_LAUNCH_PARAM_BUFFER_SIZE,    &argBufferSize,
     *         CU_LAUNCH_PARAM_END
     *     };
     *     status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL,
     * config);
     * 
     *     
     *     
     *       The extra parameter exists to allow cuLaunchKernel to take
     *       additional less commonly used arguments. extra specifies
     *       a list of names of extra settings and their corresponding values. Each
     *       extra setting name is immediately followed by the corresponding value.
     *       The list must be terminated with either NULL or
     *       CU_LAUNCH_PARAM_END.
     *     

     *     

     *       CU_LAUNCH_PARAM_END, which indicates the end of the extra
     *         array;
     *       
     *       CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that
     *         the next value in extra will be a pointer to a buffer
     *         containing all the kernel parameters for launching kernel
     *         f;
     *       
     *       CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies
     *         that the next value in extra will be a pointer to a size_t
     *         containing the size of the buffer specified with
     *         CU_LAUNCH_PARAM_BUFFER_POINTER;
     *       
     *     
     *     
     *       The error CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters
     *       are specified with both kernelParams and extra
     *       (i.e. both kernelParams and extra are
     *       non-NULL).
     *     

     *       Calling cuLaunchKernel() sets persistent function state that is the
     *       same as function state set through the following deprecated APIs:
     *     

     *       cuFuncSetBlockShape() cuFuncSetSharedSize() cuParamSetSize()
     *       cuParamSeti() cuParamSetf() cuParamSetv()
     *     

     *       When the kernel f is launched via cuLaunchKernel(), the
     *       previous block shape, shared size and parameter info associated with
     *       f is overwritten.
     *     

     *       Note that to use cuLaunchKernel(), the kernel f must
     *       either have been compiled with toolchain version 3.2 or later so that
     *       it will contain kernel parameter information, or have no kernel
     *       parameters. If either of these conditions is not met, then
     *       cuLaunchKernel() will return CUDA_ERROR_INVALID_IMAGE.
     *     

     *   
     * 
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     *
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     */
    public static int cuLaunchKernel(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams,
        Pointer extra)
    {
        return checkResult(cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra));
    }
    private static native int cuLaunchKernelNative(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams,
        Pointer extra);

    
    /**
     * Launches a CUDA function where thread blocks can cooperate and synchronize as they execute.
     * 
     *      * CUresult cuLaunchCooperativeKernel (
     *      CUfunction f,
     *      unsigned int  gridDimX,
     *      unsigned int  gridDimY,
     *      unsigned int  gridDimZ,
     *      unsigned int  blockDimX,
     *      unsigned int  blockDimY,
     *      unsigned int  blockDimZ,
     *      unsigned int  sharedMemBytes,
     *      CUstream hStream,
     *      void** kernelParams )
     * 
     * Launches a CUDA function where thread blocks can cooperate
     *   and synchronize as they execute. 
     * 
     * 
     *   Description
     *   Invokes the kernel f on a
     *     gridDimX x gridDimY x gridDimZ grid of
     *     blocks. Each block contains blockDimX x blockDimY
     *     x blockDimZ threads.
     *   
     *   sharedMemBytes sets the
     *     amount of dynamic shared memory that will be available to each thread
     *     block.
     *   
     *   The device on which this kernel is
     *     invoked must have a non-zero value for the device attribute
     *     CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH.
     *   
     *   The total number of blocks launched
     *     cannot exceed the maximum number of blocks per multiprocessor as
     *     returned by cuOccupancyMaxActiveBlocksPerMultiprocessor (or
     *     cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number
     *     of multiprocessors as specified by the device attribute
     *     CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
     *   
     *   The kernel cannot make use of CUDA
     *     dynamic parallelism.
     *   
     *   Kernel parameters must be specified
     *     via kernelParams. If f has N parameters, then kernelParams needs to be an array of N pointers. Each of kernelParams[0] through kernelParams[N-1] must point
     *     to a region of memory from which the actual kernel parameter will be
     *     copied. The number of kernel parameters
     *     and their offsets and sizes do not
     *     need to be specified as that information is retrieved directly from
     *     the kernel's image.
     *   
     *   Calling cuLaunchCooperativeKernel()
     *     sets persistent function state that is the same as function state set
     *     through cuLaunchKernel API
     *   
     *   When the kernel f is
     *     launched via cuLaunchCooperativeKernel(), the previous block shape,
     *     shared size and parameter info associated with f is
     *     overwritten.
     *   
     *   Note that to use
     *     cuLaunchCooperativeKernel(), the kernel f must either have
     *     been compiled with toolchain version 3.2 or later so that it will
     *     contain kernel parameter information,
     *     or have no kernel parameters. If
     *     either of these conditions is not met, then cuLaunchCooperativeKernel()
     *     will return CUDA_ERROR_INVALID_IMAGE.
     *   
     *   
     *     Note:
     *     
     *       
     *         This function uses
     *           standard  default stream semantics. 
     *         
     *       
     *       
     *         Note that this function
     *           may also return error codes from previous, asynchronous launches.
     *         
     *       
     *     
     *   
     *   
     * 
     * 
     * @param f Kernel to launch
     * @param gridDimX Width of grid in blocks
     * @param gridDimY Height of grid in blocks
     * @param gridDimZ Depth of grid in blocks
     * @param blockDimX X dimension of each thread block
     * @param blockDimY Y dimension of each thread block
     * @param blockDimZ Z dimension of each thread block
     * @param sharedMemBytes Dynamic shared-memory size per thread block in bytes
     * @param hStream Stream identifier
     * @param kernelParams Array of pointers to kernel parameters
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuLaunchCooperativeKernelMultiDevice
     * @see JCudaDriver#cudaLaunchCooperativeKernel
     */
    public static int cuLaunchCooperativeKernel(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams)
    {
        return checkResult(cuLaunchCooperativeKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams));
    }
    private static native int cuLaunchCooperativeKernelNative(
        CUfunction f,
        int gridDimX,
        int gridDimY,
        int gridDimZ,
        int blockDimX,
        int blockDimY,
        int blockDimZ,
        int sharedMemBytes,
        CUstream hStream,
        Pointer kernelParams);

    
    /**
     * Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they execute.
     * 
     *      * CUresult cuLaunchCooperativeKernelMultiDevice (
     *      CUDA_LAUNCH_PARAMS* launchParamsList,
     *      unsigned int  numDevices,
     *      unsigned int  flags )
     * 
     * Launches CUDA functions on multiple devices where thread
     *   blocks can cooperate and synchronize as they execute. 
     * 
     * 
     *   Description
     *   Invokes kernels as specified in the
     *     launchParamsList array where each element of the array
     *     specifies all the parameters required to perform a single kernel
     *     launch. These kernels
     *     can cooperate and synchronize as they
     *     execute. The size of the array is specified by numDevices.
     *   
     *   No two kernels can be launched on
     *     the same device. All the devices targeted by this multi-device launch
     *     must be identical.
     *     All devices must have a non-zero value
     *     for the device attribute
     *     CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH.
     *   
     *   All kernels launched must be identical
     *     with respect to the compiled code. Note that any __device__, __constant__
     *     or __managed__
     *     variables present in the module that
     *     owns the kernel launched on each device, are independently instantiated
     *     on every device.
     *     It is the application's responsiblity
     *     to ensure these variables are initialized and used appropriately.
     *   
     *   The size of the grids as specified
     *     in blocks, the size of the blocks themselves and the amount of shared
     *     memory used by each
     *     thread block must also match across
     *     all launched kernels.
     *   
     *   The streams used to launch these
     *     kernels must have been created via either cuStreamCreate or
     *     cuStreamCreateWithPriority. The NULL stream or CU_STREAM_LEGACY or
     *     CU_STREAM_PER_THREAD cannot be used.
     *   
     *   The total number of blocks launched
     *     per kernel cannot exceed the maximum number of blocks per multiprocessor
     *     as returned by
     *     cuOccupancyMaxActiveBlocksPerMultiprocessor
     *     (or cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the
     *     number of multiprocessors as specified by the device attribute
     *     CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. Since the total number of
     *     blocks launched per device has to match across all devices, the maximum
     *     number of blocks that
     *     can be launched per device will be
     *     limited by the device with the least number of multiprocessors.
     *   
     *   The kernels cannot make use of CUDA
     *     dynamic parallelism.
     *   
     *   The CUDA_LAUNCH_PARAMS structure is
     *     defined as: 
     *   
        typedef struct CUDA_LAUNCH_PARAMS_st
     *               {
     *                   CUfunction function;
     *                   unsigned int gridDimX;
     *                   unsigned int gridDimY;
     *                   unsigned int gridDimZ;
     *                   unsigned int blockDimX;
     *                   unsigned int blockDimY;
     *                   unsigned int blockDimZ;
     *                   unsigned int sharedMemBytes;
     *                   CUstream hStream;
     *                   void **kernelParams;
     *               } CUDA_LAUNCH_PARAMS;
     *   where:
     *   
     *     
     *       CUDA_LAUNCH_PARAMS::function
     *         specifies the kernel to be launched. All functions must be identical
     *         with respect to the compiled code.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::gridDimX
     *         is the width of the grid in blocks. This must match across all kernels
     *         launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::gridDimY
     *         is the height of the grid in blocks. This must match across all kernels
     *         launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::gridDimZ
     *         is the depth of the grid in blocks. This must match across all kernels
     *         launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::blockDimX
     *         is the X dimension of each thread block. This must match across all
     *         kernels launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::blockDimX
     *         is the Y dimension of each thread block. This must match across all
     *         kernels launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::blockDimZ
     *         is the Z dimension of each thread block. This must match across all
     *         kernels launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::sharedMemBytes
     *         is the dynamic shared-memory size per thread block in bytes. This must
     *         match across all kernels launched.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::hStream
     *         is the handle to the stream to perform the launch in. This cannot be
     *         the NULL stream or CU_STREAM_LEGACY or CU_STREAM_PER_THREAD. The CUDA
     *         context associated with this stream must match that associated with
     *         CUDA_LAUNCH_PARAMS::function.
     *       
     *     
     *     
     *       CUDA_LAUNCH_PARAMS::kernelParams
     *         is an array of pointers to kernel parameters. If CUDA_LAUNCH_PARAMS::function
     *         has N parameters, then CUDA_LAUNCH_PARAMS::kernelParams needs to be an
     *         array of N pointers. Each of CUDA_LAUNCH_PARAMS::kernelParams[0]
     *         through CUDA_LAUNCH_PARAMS::kernelParams[N-1] must point to a region
     *         of memory from which the actual kernel parameter will be copied. The
     *         number of kernel parameters
     *         and their offsets and sizes
     *         do not need to be specified as that information is retrieved directly
     *         from the kernel's image.
     *       
     *     
     *   
     *   
     *   By default, the kernel won't begin
     *     execution on any GPU until all prior work in all the specified streams
     *     has completed. This
     *     behavior can be overridden by
     *     specifying the flag CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC.
     *     When this flag is specified, each kernel will only wait for prior work
     *     in the stream corresponding to that GPU to complete
     *     before it begins execution.
     *   
     *   Similarly, by default, any subsequent
     *     work pushed in any of the specified streams will not begin execution
     *     until the kernels
     *     on all GPUs have completed. This
     *     behavior can be overridden by specifying the flag
     *     CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC. When this
     *     flag is specified, any subsequent work pushed in any of the specified
     *     streams will only wait for the kernel launched
     *     on the GPU corresponding to that
     *     stream to complete before it begins execution.
     *   
     *   Calling
     *     cuLaunchCooperativeKernelMultiDevice() sets persistent function state
     *     that is the same as function state set through cuLaunchKernel API when
     *     called individually for each element in launchParamsList.
     *   
     *   When kernels are launched via
     *     cuLaunchCooperativeKernelMultiDevice(), the previous block shape,
     *     shared size and parameter info associated with each
     *     CUDA_LAUNCH_PARAMS::function in launchParamsList is
     *     overwritten.
     *   
     *   Note that to use
     *     cuLaunchCooperativeKernelMultiDevice(), the kernels must either have
     *     been compiled with toolchain version 3.2 or later so that it will
     *     contain kernel parameter
     *     information, or have no kernel
     *     parameters. If either of these conditions is not met, then
     *     cuLaunchCooperativeKernelMultiDevice() will return
     *     CUDA_ERROR_INVALID_IMAGE.
     *   
     *   
     *     Note:
     *     
     *       
     *         This function uses
     *           standard  default stream semantics. 
     *         
     *       
     *       
     *         Note that this function
     *           may also return error codes from previous, asynchronous launches.
     *         
     *       
     *     
     *   
     *   
     * 
     * 
     * @param launchParamsList List of launch parameters, one per device
     * @param numDevices Size of the launchParamsList array
     * @param flags Flags to control launch behavior
     * 
     * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
     * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
     * CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
     * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
     * 
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuFuncSetCacheConfig
     * @see JCudaDriver#cuFuncGetAttribute
     * @see JCudaDriver#cuLaunchCooperativeKernel
     * @see JCudaDriver#cudaLaunchCooperativeKernelMultiDevice
     */
    public static int cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS launchParamsList[], int numDevices, int flags)
    {
        return checkResult(cuLaunchCooperativeKernelMultiDeviceNative(launchParamsList, numDevices, flags));
    }
    private static native int cuLaunchCooperativeKernelMultiDeviceNative(CUDA_LAUNCH_PARAMS launchParamsList[], int numDevices, int flags);
    
    
    /**
     * Enqueues a host function call in a stream.
     *
     * Enqueues a host function to run in a stream.  The function will be called
     * after currently enqueued work and will block work added after it.

     * 

     * The host function must not make any CUDA API calls.  Attempting to use a
     * CUDA API may result in ::CUDA_ERROR_NOT_PERMITTED, but this is not required.
     * The host function must not perform any synchronization that may depend on
     * outstanding CUDA work not mandated to run earlier.  Host functions without a
     * mandated order (such as in independent streams) execute in undefined order
     * and may be serialized.

     * 

     * For the purposes of Unified Memory, execution makes a number of guarantees:
     * 
     *   The stream is considered idle for the duration of the function's
     *   execution.  Thus, for example, the function may always use memory attached
     *   to the stream it was enqueued in.
     *   The start of execution of the function has the same effect as
     *   synchronizing an event recorded in the same stream immediately prior to
     *   the function.  It thus synchronizes streams which have been "joined"
     *   prior to the function.
     *   Adding device work to any stream does not have the effect of making
     *   the stream active until all preceding host functions and stream callbacks
     *   have executed.  Thus, for
     *   example, a function might use global attached memory even if work has
     *   been added to another stream, if the work has been ordered behind the
     *   function call with an event.
     *   Completion of the function does not cause a stream to become
     *   active except as described above.  The stream will remain idle
     *   if no device work follows the function, and will remain idle across
     *   consecutive host functions or stream callbacks without device work in
     *   between.  Thus, for example,
     *   stream synchronization can be done by signaling from a host function at the
     *   end of the stream.
     * 
     *
     * Note that, in contrast to ::cuStreamAddCallback, the function will not be
     * called in the event of an error in the CUDA context.
     *
     * @param hStream  - Stream to enqueue function call in
     * @param fn       - The function to call once preceding stream operations are complete
     * @param userData - User-specified data to be passed to the function
     *
     * @return
     * CUDA_SUCCESS,
     * CUDA_ERROR_DEINITIALIZED,
     * CUDA_ERROR_NOT_INITIALIZED,
     * CUDA_ERROR_INVALID_CONTEXT,
     * CUDA_ERROR_INVALID_HANDLE,
     * CUDA_ERROR_NOT_SUPPORTED
     *
     * @see 
     * JCudaDriver#cuStreamCreate
     * JCudaDriver#cuStreamQuery
     * JCudaDriver#cuStreamSynchronize
     * JCudaDriver#cuStreamWaitEvent
     * JCudaDriver#cuStreamDestroy
     * JCudaDriver#cuMemAllocManaged
     * JCudaDriver#cuStreamAttachMemAsync
     * JCudaDriver#cuStreamAddCallback
     */
    public static int cuLaunchHostFunc(CUstream hStream, CUhostFn fn, Object userData)
    {
        return checkResult(cuLaunchHostFuncNative(hStream, fn, userData));
    }
    private static native int cuLaunchHostFuncNative(CUstream hStream, CUhostFn fn, Object userData);
    
    
    /**
     * Returns resource limits.
     *
     *      * CUresult cuCtxGetLimit (
     *      size_t* pvalue,
     *      CUlimit limit )
     * 
     * 
     *   Returns resource limits.  Returns in *pvalue the current size of limit. The supported
     *     CUlimit values are:
     *   

     *     
     *       CU_LIMIT_STACK_SIZE: stack size
     *         in bytes of each GPU thread.
     *       
     *     
     *     
     *       CU_LIMIT_PRINTF_FIFO_SIZE: size
     *         in bytes of the FIFO used by the printf() device system call.
     *       
     *     
     *     
     *       CU_LIMIT_MALLOC_HEAP_SIZE: size
     *         in bytes of the heap used by the malloc() and free() device system
     *         calls.
     *       
     *     
     *     
     *       CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH:
     *         maximum grid depth at which a thread can issue the device runtime call
     *         cudaDeviceSynchronize() to wait on child grid launches
     *         to complete.
     *       
     *     
     *     
     *       CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of
     *         outstanding device runtime launches that can be made from this
     *         context.
     *       
     *     
     *     
     *       CU_LIMIT_MAX_L2_FETCH_GRANULARITY: L2 cache fetch granularity
     *       
     *     
     *   
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param pvalue Returned size of limit
     * @param limit Limit to query
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_UNSUPPORTED_LIMIT
     *
     * @see JCudaDriver#cuCtxCreate
     * @see JCudaDriver#cuCtxDestroy
     * @see JCudaDriver#cuCtxGetApiVersion
     * @see JCudaDriver#cuCtxGetCacheConfig
     * @see JCudaDriver#cuCtxGetDevice
     * @see JCudaDriver#cuCtxPopCurrent
     * @see JCudaDriver#cuCtxPushCurrent
     * @see JCudaDriver#cuCtxSetCacheConfig
     * @see JCudaDriver#cuCtxSetLimit
     * @see JCudaDriver#cuCtxSynchronize
     */
    public static int cuCtxGetLimit(long pvalue[], int limit)
    {
        return checkResult(cuCtxGetLimitNative(pvalue, limit));
    }
    private static native int cuCtxGetLimitNative(long pvalue[], int limit);




    /**
     * Initialize the profiling.
     *
     *      * CUresult cuProfilerInitialize (
     *      const char* configFile,
     *      const char* outputFile,
     *      CUoutput_mode outputMode )
     * 
     * 
     *   Initialize the profiling.  Using this
     *     API user can initialize the CUDA profiler by specifying the configuration
     *     file, output
     *     file and output file format. This API is
     *     generally used to profile different set of counters by looping the
     *     kernel launch.
     *     The configFile parameter can
     *     be used to select profiling options including profiler counters. Refer
     *     to the "Compute Command Line Profiler
     *     User Guide" for supported profiler
     *     options and counters.
     *   
     *   Limitation: The CUDA profiler cannot be
     *     initialized with this API if another profiling tool is already active,
     *     as indicated
     *     by the CUDA_ERROR_PROFILER_DISABLED
     *     return code.
     *   
     *   Typical usage of the profiling APIs is
     *     as follows:
     *   
     *   for each set of counters/options
     *     {
     *     cuProfilerInitialize(); //Initialize
     *     profiling, set the counters or options in the config file
     *     ...
     *     cuProfilerStart();
     *     // code to be profiled
     *     cuProfilerStop();
     *     ...
     *     cuProfilerStart();
     *     // code to be profiled
     *     cuProfilerStop();
     *     ...
     *     }
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     * @param configFile Name of the config file that lists the counters/options for profiling.
     * @param outputFile Name of the outputFile where the profiling results will be stored.
     * @param outputMode outputMode, can be CU_OUT_KEY_VALUE_PAIR or CU_OUT_CSV.
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
     * CUDA_ERROR_PROFILER_DISABLED
     *
     * @see JCudaDriver#cuProfilerStart
     * @see JCudaDriver#cuProfilerStop
     * 
     * @deprecated Deprecated as of CUDA 11.0
     */
    public static int cuProfilerInitialize(String configFile, String outputFile, int outputMode)
    {
        return checkResult(cuProfilerInitializeNative(configFile, outputFile, outputMode));
    }
    private static native int cuProfilerInitializeNative(String configFile, String outputFile, int outputMode);

    /**
     * Enable profiling.
     *
     *      * CUresult cuProfilerStart (
     *      void )
     * 
     * 
     *   Enable profiling.  Enables profile
     *     collection by the active profiling tool. If profiling is already
     *     enabled, then cuProfilerStart() has no effect.
     *   
     *   cuProfilerStart and cuProfilerStop APIs
     *     are used to programmatically control the profiling granularity by
     *     allowing profiling
     *     to be done only on selective pieces of
     *     code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuProfilerInitialize
     * @see JCudaDriver#cuProfilerStop
     */
    public static int cuProfilerStart()
    {
        return checkResult(cuProfilerStartNative());
    }
    private static native int cuProfilerStartNative();

    /**
     * Disable profiling.
     *
     *      * CUresult cuProfilerStop (
     *      void )
     * 
     * 
     *   Disable profiling.  Disables profile
     *     collection by the active profiling tool. If profiling is already
     *     disabled, then cuProfilerStop() has no effect.
     *   
     *   cuProfilerStart and cuProfilerStop APIs
     *     are used to programmatically control the profiling granularity by
     *     allowing profiling
     *     to be done only on selective pieces of
     *     code.
     *   
     *   
     *     Note:
     *     Note that this
     *       function may also return error codes from previous, asynchronous
     *       launches.
     *     
     *   
     *   
     * 
     *
     *
     * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
     *
     * @see JCudaDriver#cuProfilerInitialize
     * @see JCudaDriver#cuProfilerStart
     */
    public static int cuProfilerStop()
    {
        return checkResult(cuProfilerStopNative());
    }
    private static native int cuProfilerStopNative();







}
* *CUDA array type** * *	* *Valid extents that must always be met * {(width range in * elements), (height range), (depth range)}** * *	* *Valid extents with CUDA_ARRAY3D_SURFACE_LDST set * {(width range in * elements), (height range), (depth range)}** * *
* 1D *	* { (1,TEXTURE1D_WIDTH), * 0, 0 } * *	* { (1,SURFACE1D_WIDTH), * 0, 0 } * *
* 2D *	* { (1,TEXTURE2D_WIDTH), * (1,TEXTURE2D_HEIGHT), 0 } * *	* { (1,SURFACE2D_WIDTH), * (1,SURFACE2D_HEIGHT), 0 } * *
* 3D *	* { (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } * *	* { (1,SURFACE3D_WIDTH), * (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) } * *
* 1D Layered *	* { * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } * *	* { * (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) } * *
* 2D Layered *	* { * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } * *	* { * (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) } * *
* Cubemap *	* { (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } * *	* { (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 } * *
* Cubemap Layered *	* { * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } * *	* { * (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) } * *
CUresult cuLaunchKernel	(	CUfunction	f,
		unsigned int	gridDimX,
		unsigned int	gridDimY,
		unsigned int	gridDimZ,
		unsigned int	blockDimX,
		unsigned int	blockDimY,
		unsigned int	blockDimZ,
		unsigned int	sharedMemBytes,
		CUstream	hStream,
		void **	kernelParams,
		void **	extra
	)