All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jcuda.driver.JCudaDriver Maven / Gradle / Ivy

The newest version!
/*
 * JCuda - Java bindings for NVIDIA CUDA driver and runtime API
 *
 * Copyright (c) 2009-2012 Marco Hutter - http://www.jcuda.org
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

package jcuda.driver;

import jcuda.*;

/**
 * Java bindings for the NVidia CUDA driver API.
*
* Most comments are extracted from the CUDA online documentation */ public class JCudaDriver { /** The CUDA version */ public static final int CUDA_VERSION = 6050; /** * If set, host memory is portable between CUDA contexts. * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_PORTABLE = 0x01; /** * If set, host memory is mapped into CUDA address space and * JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer. * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_DEVICEMAP = 0x02; /** * If set, host memory is allocated as write-combined - fast to write, * faster to DMA, slow to read except via SSE4 streaming load instruction * (MOVNTDQA). * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_WRITECOMBINED = 0x04; /** * If set, host memory is portable between CUDA contexts. * Flag for ::cuMemHostRegister() */ public static final int CU_MEMHOSTREGISTER_PORTABLE = 0x01; /** * If set, host memory is mapped into CUDA address space and * ::cuMemHostGetDevicePointer() may be called on the host pointer. * Flag for ::cuMemHostRegister() */ public static final int CU_MEMHOSTREGISTER_DEVICEMAP = 0x02; /** * If set, peer memory is mapped into CUDA address space and * ::cuMemPeerGetDevicePointer() may be called on the host pointer. * Flag for ::cuMemPeerRegister() * @deprecated This value has been added in CUDA 4.0 RC, * and removed in CUDA 4.0 RC2 */ public static final int CU_MEMPEERREGISTER_DEVICEMAP = 0x02; /** * If set, the CUDA array is a collection of layers, where each layer is either a 1D * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number * of layers, not the depth of a 3D array. */ public static final int CUDA_ARRAY3D_LAYERED = 0x01; /** * If set, the CUDA array contains an array of 2D slices * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies * the number of slices, not the depth of a 3D array. * @deprecated use CUDA_ARRAY3D_LAYERED */ public static final int CUDA_ARRAY3D_2DARRAY = 0x01; /** * This flag must be set in order to bind a surface reference * to the CUDA array */ public static final int CUDA_ARRAY3D_SURFACE_LDST = 0x02; /** * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The * width of such a CUDA array must be equal to its height, and Depth must be six. * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps * and Depth must be a multiple of six. */ public static final int CUDA_ARRAY3D_CUBEMAP = 0x04; /** * This flag must be set in order to perform texture gather operations * on a CUDA array. */ public static final int CUDA_ARRAY3D_TEXTURE_GATHER = 0x08; /** * This flag if set indicates that the CUDA * array is a DEPTH_TEXTURE. */ public static final int CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10; /** * For texture references loaded into the module, use default * texunit from texture reference */ public static final int CU_PARAM_TR_DEFAULT = -1; /** * Override the texref format with a format inferred from the array */ public static final int CU_TRSA_OVERRIDE_FORMAT = 0x01; /** * Read the texture as integers rather than promoting the values * to floats in the range [0,1] */ public static final int CU_TRSF_READ_AS_INTEGER = 0x01; /** * Use normalized texture coordinates in the range [0,1) instead of [0,dim) */ public static final int CU_TRSF_NORMALIZED_COORDINATES = 0x02; /** * Perform sRGB->linear conversion during texture read. * Flag for JCudaDriver#cuTexRefSetFlags() */ public static final int CU_TRSF_SRGB = 0x10; /** * Specifies a stream callback does not block the stream while * executing. This is the default behavior. * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)} * * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) * and may be removed (or added again) in future releases */ public static final int CU_STREAM_CALLBACK_NONBLOCKING = 0x00; /** * If set, the stream callback blocks the stream until it is * done executing. * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)} * * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) * and may be removed (or added again) in future releases */ public static final int CU_STREAM_CALLBACK_BLOCKING = 0x01; /** * Private inner class for the constant pointer values * CU_LAUNCH_PARAM_END, CU_LAUNCH_PARAM_BUFFER_POINTER, * and CU_LAUNCH_PARAM_BUFFER_SIZE. * * TODO: These constants could be misused: There is no * mechanism for preventing these Pointers to be used * for memory allocation. However, at the moment there * is no other way for emulating these pointer constants. */ private static class ConstantPointer extends Pointer { private ConstantPointer(long value) { super(value); } } /** * End of array terminator for the \p extra parameter to * ::cuLaunchKernel */ public static final Pointer CU_LAUNCH_PARAM_END = new ConstantPointer(0); // ((void*)0x00) /** * Indicator that the next value in the \p extra parameter to * ::cuLaunchKernel will be a pointer to a buffer containing all kernel * parameters used for launching kernel \p f. This buffer needs to * honor all alignment/padding requirements of the individual parameters. * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no * effect. */ public static final Pointer CU_LAUNCH_PARAM_BUFFER_POINTER = new ConstantPointer(1); //((void*)0x01) /** * Indicator that the next value in the \p extra parameter to * ::cuLaunchKernel will be a pointer to a size_t which contains the * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER. * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified * in the \p extra array if the value associated with * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. */ public static final Pointer CU_LAUNCH_PARAM_BUFFER_SIZE = new ConstantPointer(2); // ((void*)0x02) /** * Whether a CudaException should be thrown if a method is about * to return a result code that is not CUresult.CUDA_SUCCESS */ private static boolean exceptionsEnabled = false; static { LibUtils.loadLibrary("JCudaDriver"); } /* Private constructor to prevent instantiation */ private JCudaDriver() { } /** * Set the specified log level for the JCuda driver library.
*
* Currently supported log levels: *
* LOG_QUIET: Never print anything
* LOG_ERROR: Print error messages
* LOG_TRACE: Print a trace of all native function calls
* * @param logLevel The log level to use. */ public static void setLogLevel(LogLevel logLevel) { setLogLevel(logLevel.ordinal()); } private static native void setLogLevel(int logLevel); /** * Enables or disables exceptions. By default, the methods of this class * only return the CUresult error code from the underlying CUDA function. * If exceptions are enabled, a CudaException with a detailed error * message will be thrown if a method is about to return a result code * that is not CUresult.CUDA_SUCCESS * * @param enabled Whether exceptions are enabled */ public static void setExceptionsEnabled(boolean enabled) { exceptionsEnabled = enabled; } /** * If the given result is different to CUresult.CUDA_SUCCESS and * exceptions have been enabled, this method will throw a * CudaException with an error message that corresponds to the * given result code. Otherwise, the given result is simply * returned. * * @param result The result to check * @return The result that was given as the parameter * @throws CudaException If exceptions have been enabled and * the given result code is not CUresult.CUDA_SUCCESS */ private static int checkResult(int result) { if (exceptionsEnabled && result != CUresult.CUDA_SUCCESS) { throw new CudaException(CUresult.stringFor(result)); } return result; } /** * Returns the given (address) value, adjusted to have * the given alignment. This function may be used to * align the parameters for a kernel call according * to their alignment requirements. * * @param value The address value * @param alignment The desired alignment * @return The aligned address value * @deprecated This method was intended for a simpler * kernel parameter setup in earlier CUDA versions, * and should not be required any more. It may be * removed in future releases. */ public static int align(int value, int alignment) { return (((value) + (alignment) - 1) & ~((alignment) - 1)); } /** * A wrapper function for * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)} * which allows passing in the options for the JIT compiler, and obtaining * the output of the JIT compiler via a {@link JITOptions} object.
*
* Note: This method should be considered as preliminary, * and might change in future releases. * */ public static int cuModuleLoadDataJIT(CUmodule module, Pointer pointer, JITOptions jitOptions) { return cuModuleLoadDataJITNative(module, pointer, jitOptions); } private static native int cuModuleLoadDataJITNative(CUmodule module, Pointer pointer, JITOptions jitOptions); /** *
     * Gets the string description of an error code
     *
     * Sets *pStr to the address of a NULL-terminated string description
     * of the error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
* * @param error - Error code to convert to string * @param pStr - Address of the string pointer. * * @return * ::CUDA_SUCCESS, * ::CUDA_ERROR_INVALID_VALUE * * @see CUresult */ public static int cuGetErrorString(int error, String pStr[]) { return checkResult(cuGetErrorStringNative(error, pStr)); } private static native int cuGetErrorStringNative(int error, String pStr[]); /** *
     * Gets the string representation of an error code enum name
     *
     * Sets *pStr to the address of a NULL-terminated string representation
     * of the name of the enum error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
* @param error - Error code to convert to string * @param pStr - Address of the string pointer. * * @return * ::CUDA_SUCCESS, * ::CUDA_ERROR_INVALID_VALUE * * @see CUresult */ public static int cuGetErrorName(int error, String pStr[]) { return checkResult(cuGetErrorNameNative(error, pStr)); } private static native int cuGetErrorNameNative(int error, String pStr[]); /** * Initialize the CUDA driver API. * *
     * CUresult cuInit (
     *      unsigned int  Flags )
     * 
*
*

Initialize the CUDA driver API. * Initializes the driver API and must be called before any other function * from the driver API. * Currently, the Flags parameter * must be 0. If cuInit() has not been called, any function from the * driver API will return CUDA_ERROR_NOT_INITIALIZED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param Flags Initialization flag for CUDA. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * */ public static int cuInit(int Flags) { return checkResult(cuInitNative(Flags)); } private static native int cuInitNative(int Flags); /** * Returns a handle to a compute device. * *
     * CUresult cuDeviceGet (
     *      CUdevice* device,
     *      int  ordinal )
     * 
*
*

Returns a handle to a compute device. * Returns in *device a device handle given an ordinal in the * range [0, cuDeviceGetCount()-1]. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param device Returned device handle * @param ordinal Device number to get handle for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGet(CUdevice device, int ordinal) { return checkResult(cuDeviceGetNative(device, ordinal)); } private static native int cuDeviceGetNative(CUdevice device, int ordinal); /** * Returns the number of compute-capable devices. * *
     * CUresult cuDeviceGetCount (
     *      int* count )
     * 
*
*

Returns the number of compute-capable * devices. Returns in *count the number of devices with * compute capability greater than or equal to 1.0 that are available for * execution. If there is * no such device, cuDeviceGetCount() * returns 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Returned number of compute-capable devices * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetCount(int count[]) { return checkResult(cuDeviceGetCountNative(count)); } private static native int cuDeviceGetCountNative(int count[]); /** * Returns an identifer string for the device. * *
     * CUresult cuDeviceGetName (
     *      char* name,
     *      int  len,
     *      CUdevice dev )
     * 
*
*

Returns an identifer string for the * device. Returns an ASCII string identifying the device dev * in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param name Returned identifier string for the device * @param len Maximum length of string to store in name * @param dev Device to get identifier string for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetName(byte name[], int len, CUdevice dev) { return checkResult(cuDeviceGetNameNative(name, len, dev)); } private static native int cuDeviceGetNameNative(byte name[], int len, CUdevice dev); /** * Returns the compute capability of the device. * *
     * CUresult cuDeviceComputeCapability (
     *      int* major,
     *      int* minor,
     *      CUdevice dev )
     * 
*
*

Returns the compute capability of the * device. * DeprecatedThis function was deprecated * as of CUDA 5.0 and its functionality superceded by * cuDeviceGetAttribute(). *

*

Returns in *major and *minor the major and minor revision numbers that define the * compute capability of the device dev. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param major Major revision number * @param minor Minor revision number * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceComputeCapability(int major[], int minor[], CUdevice dev) { return checkResult(cuDeviceComputeCapabilityNative(major, minor, dev)); } private static native int cuDeviceComputeCapabilityNative(int major[], int minor[], CUdevice dev); /** * Returns the total amount of memory on the device. * *
     * CUresult cuDeviceTotalMem (
     *      size_t* bytes,
     *      CUdevice dev )
     * 
*
*

Returns the total amount of memory on * the device. Returns in *bytes the total amount of memory * available on the device dev in bytes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param bytes Returned memory available on device in bytes * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet */ public static int cuDeviceTotalMem(long bytes[], CUdevice dev) { return checkResult(cuDeviceTotalMemNative(bytes, dev)); } private static native int cuDeviceTotalMemNative(long bytes[], CUdevice dev); /** * Returns properties for a selected device. * *
     * CUresult cuDeviceGetProperties (
     *      CUdevprop* prop,
     *      CUdevice dev )
     * 
*
*

Returns properties for a selected device. * DeprecatedThis function was deprecated * as of CUDA 5.0 and replaced by cuDeviceGetAttribute(). *

*

Returns in *prop the properties * of device dev. The CUdevprop structure is defined as: *

*
     typedef struct CUdevprop_st {
     *      int maxThreadsPerBlock;
     *      int maxThreadsDim[3];
     *      int maxGridSize[3];
     *      int sharedMemPerBlock;
     *      int totalConstantMemory;
     *      int SIMDWidth;
     *      int memPitch;
     *      int regsPerBlock;
     *      int clockRate;
     *      int textureAlign
     *   } CUdevprop;
* where:

*
    *
  • *

    maxThreadsPerBlock is the * maximum number of threads per block; *

    *
  • *
  • *

    maxThreadsDim[3] is the maximum * sizes of each dimension of a block; *

    *
  • *
  • *

    maxGridSize[3] is the maximum * sizes of each dimension of a grid; *

    *
  • *
  • *

    sharedMemPerBlock is the total * amount of shared memory available per block in bytes; *

    *
  • *
  • *

    totalConstantMemory is the * total amount of constant memory available on the device in bytes; *

    *
  • *
  • *

    SIMDWidth is the warp * size; *

    *
  • *
  • *

    memPitch is the maximum pitch * allowed by the memory copy functions that involve memory regions * allocated through cuMemAllocPitch(); *

    *
  • *
  • *

    regsPerBlock is the total * number of registers available per block; *

    *
  • *
  • *

    clockRate is the clock frequency * in kilohertz; *

    *
  • *
  • *

    textureAlign is the alignment * requirement; texture base addresses that are aligned to textureAlign * bytes do not need an offset * applied to texture fetches. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param prop Returned properties of device * @param dev Device to get properties for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev) { return checkResult(cuDeviceGetPropertiesNative(prop, dev)); } private static native int cuDeviceGetPropertiesNative(CUdevprop prop, CUdevice dev); /** * Returns information about the device. * *
     * CUresult cuDeviceGetAttribute (
     *      int* pi,
     *      CUdevice_attribute attrib,
     *      CUdevice dev )
     * 
*
*

Returns information about the device. * Returns in *pi the integer value of the attribute attrib on device dev. The supported attributes are: *

    *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads * per block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: * Maximum x-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: * Maximum y-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: * Maximum z-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: * Maximum x-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: * Maximum y-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: * Maximum z-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of * shared memory available to a thread block in bytes; this amount is * shared by all thread blocks simultaneously * resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device * for __constant__ variables in a CUDA C kernel in bytes; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_WARP_SIZE: * Warp size in threads; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_PITCH: * Maximum pitch in bytes allowed by the memory copy functions that * involve memory regions allocated through cuMemAllocPitch(); *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width for * a 1D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum * mipmapped 1D texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D texture * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width for * a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height * for a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch in * bytes for a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum * mipmapped 2D texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum * mipmapped 2D texture height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D texture * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D texture * depth; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: Alternate * maximum 3D texture width, 0 if no alternate maximum 3D texture size is * supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: Alternate * maximum 3D texture height, 0 if no alternate maximum 3D texture size * is supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: Alternate * maximum 3D texture depth, 0 if no alternate maximum 3D texture size is * supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: Maximum cubemap * texture width or height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: Maximum 1D * layered texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: Maximum layers * in a 1D layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: Maximum 2D * layered texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: Maximum 2D * layered texture height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: Maximum layers * in a 2D layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: Maximum * cubemap layered texture width or height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: Maximum * layers in a cubemap layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: Maximum 1D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: Maximum 2D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: Maximum 2D surface * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: Maximum 3D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: Maximum 3D surface * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: Maximum 3D surface * depth; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: Maximum 1D * layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: Maximum layers * in a 1D layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: Maximum 2D * layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: Maximum 2D * layered surface height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: Maximum layers * in a 2D layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: Maximum cubemap * surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: Maximum * cubemap layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: Maximum * layers in a cubemap layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit * registers available to a thread block; this number is shared by all * thread blocks simultaneously * resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CLOCK_RATE: * Typical clock frequency in kilohertz; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: * Alignment requirement; texture base addresses aligned to textureAlign * bytes do not need an offset applied to texture fetches; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment * requirement for 2D texture references bound to pitched memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: * 1 if the device can concurrently copy memory between host and device * while executing a kernel, or 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors * on the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: * 1 if there is a run time limit for kernels executed on the device, or * 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_INTEGRATED: * 1 if the device is integrated with the memory subsystem, or 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: * 1 if the device can map host memory into the CUDA address space, or 0 * if not; *

    *
  • *
  • *
    * CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: * Compute mode that device is currently in. Available modes are as * follows: *
      *
    • *

      CU_COMPUTEMODE_DEFAULT: * Default mode - Device is not restricted and can have multiple CUDA * contexts present at a single time. *

      *
    • *
    • *

      CU_COMPUTEMODE_EXCLUSIVE: * Compute-exclusive mode - Device can have only one CUDA context present * on it at a time. *

      *
    • *
    • *

      CU_COMPUTEMODE_PROHIBITED: * Compute-prohibited mode - Device is prohibited from creating new CUDA * contexts. *

      *
    • *
    • *

      CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode - * Device can have only one context used by a single process at a time. *

      *
    • *
    *
    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: * 1 if the device supports executing multiple kernels within the same * context simultaneously, or 0 if not. It is not guaranteed * that multiple kernels will be * resident on the device concurrently so this feature should not be * relied upon for correctness; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_ECC_ENABLED: * 1 if error correction is enabled on the device, 0 if error correction * is disabled or not supported by the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: * PCI bus identifier of the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: * PCI device (also known as slot) identifier of the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TCC_DRIVER: * 1 if the device is using a TCC driver. TCC is only available on Tesla * hardware running Windows Vista or later; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: * Peak memory clock frequency in kilohertz; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width * in bits; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: * Size of L2 cache in bytes. 0 if the device doesn't have L2 cache; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident * threads per multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: * 1 if the device shares a unified address space with the host, or 0 if * not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability * version number; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability * version number; *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pi Returned device attribute value * @param attrib Device attribute to query * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev) { return checkResult(cuDeviceGetAttributeNative(pi, attrib, dev)); } private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev); /** * Returns the CUDA driver version. * *
     * CUresult cuDriverGetVersion (
     *      int* driverVersion )
     * 
*
*

Returns the CUDA driver version. Returns * in *driverVersion the version number of the installed CUDA * driver. This function automatically returns CUDA_ERROR_INVALID_VALUE * if the driverVersion argument is NULL. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param driverVersion Returns the CUDA driver version * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * */ public static int cuDriverGetVersion (int driverVersion[]) { return checkResult(cuDriverGetVersionNative(driverVersion)); } private static native int cuDriverGetVersionNative(int driverVersion[]); /** * Create a CUDA context. * *
     * CUresult cuCtxCreate (
     *      CUcontext* pctx,
     *      unsigned int  flags,
     *      CUdevice dev )
     * 
*
*

Create a CUDA context. Creates a new * CUDA context and associates it with the calling thread. The flags parameter is described below. The context is created with * a usage count of 1 and the caller of cuCtxCreate() must call * cuCtxDestroy() or when done using the context. If a context is already * current to the thread, it is supplanted by the newly created context * and may be restored by a subsequent call * to cuCtxPopCurrent(). *

*

The three LSBs of the flags * parameter can be used to control how the OS thread, which owns the CUDA * context at the time of an API call, interacts with * the OS scheduler when waiting for results * from the GPU. Only one of the scheduling flags can be set when creating * a context. *

*
    *
  • *

    CU_CTX_SCHED_AUTO: The default * value if the flags parameter is zero, uses a heuristic based * on the number of active CUDA contexts in the process C and the number * of logical * processors in the system P. If * C > P, then CUDA will yield to other OS threads when waiting for * the GPU, otherwise CUDA will * not yield while waiting for * results and actively spin on the processor. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_SPIN: Instruct * CUDA to actively spin when waiting for results from the GPU. This can * decrease latency when waiting for the GPU, * but may lower the performance * of CPU threads if they are performing work in parallel with the CUDA * thread. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_YIELD: Instruct * CUDA to yield its thread when waiting for results from the GPU. This * can increase latency when waiting for the * GPU, but can increase the * performance of CPU threads performing work in parallel with the GPU. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_BLOCKING_SYNC: * Instruct CUDA to block the CPU thread on a synchronization primitive * when waiting for the GPU to finish work. *

    *
  • *
*

*
    *
  • *

    CU_CTX_BLOCKING_SYNC: Instruct * CUDA to block the CPU thread on a synchronization primitive when * waiting for the GPU to finish work. *

    *

    Deprecated: * This flag was deprecated as of CUDA 4.0 and was replaced with * CU_CTX_SCHED_BLOCKING_SYNC. *

    *
  • *
*

*
    *
  • *

    CU_CTX_MAP_HOST: Instruct CUDA * to support mapped pinned allocations. This flag must be set in order * to allocate pinned host memory that is * accessible to the GPU. *

    *
  • *
*

*
    *
  • *

    CU_CTX_LMEM_RESIZE_TO_MAX: * Instruct CUDA to not reduce local memory after resizing local memory * for a kernel. This can prevent thrashing by local memory * allocations when launching many * kernels with high local memory usage at the cost of potentially * increased memory usage. *

    *
  • *
*

*

Context creation will fail with * CUDA_ERROR_UNKNOWN if the compute mode of the device is * CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail * with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to * CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the * device. The function cuDeviceGetAttribute() can be used with * CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the * device. The nvidia-smi tool can be used to set the compute mode for * devices. Documentation * for nvidia-smi can be obtained by passing * a -h option to it. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle of the new context * @param flags Context creation flags * @param dev Device to create context on * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev) { return checkResult(cuCtxCreateNative(pctx, flags, dev)); } private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev); /** * Destroy a CUDA context. * *
     * CUresult cuCtxDestroy (
     *      CUcontext ctx )
     * 
*
*

Destroy a CUDA context. Destroys the * CUDA context specified by ctx. The context ctx will * be destroyed regardless of how many threads it is current to. It is * the responsibility of the calling function to ensure * that no API call issues using ctx while cuCtxDestroy() is executing. *

*

If ctx is current to the * calling thread then ctx will also be popped from the current * thread's context stack (as though cuCtxPopCurrent() were called). If * ctx is current to other threads, then ctx will * remain current to those threads, and attempting to access ctx * from those threads will result in the error * CUDA_ERROR_CONTEXT_IS_DESTROYED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxDestroy(CUcontext ctx) { return checkResult(cuCtxDestroyNative(ctx)); } private static native int cuCtxDestroyNative(CUcontext ctx); /** * Increment a context's usage-count. * *
     * CUresult cuCtxAttach (
     *      CUcontext* pctx,
     *      unsigned int  flags )
     * 
*
*

Increment a context's usage-count. * DeprecatedNote that this function is * deprecated and should not be used. *

*

Increments the usage count of the * context and passes back a context handle in *pctx that must * be passed to cuCtxDetach() when the application is done with the * context. cuCtxAttach() fails if there is no context current to the * thread. *

*

Currently, the flags parameter * must be 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle of the current context * @param flags Context attach flags (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxDetach * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxAttach(CUcontext pctx, int flags) { return checkResult(cuCtxAttachNative(pctx, flags)); } private static native int cuCtxAttachNative(CUcontext pctx, int flags); /** * Decrement a context's usage-count. * *
     * CUresult cuCtxDetach (
     *      CUcontext ctx )
     * 
*
*

Decrement a context's usage-count. * DeprecatedNote that this function is * deprecated and should not be used. *

*

Decrements the usage count of the * context ctx, and destroys the context if the usage count goes * to 0. The context must be a handle that was passed back by cuCtxCreate() * or cuCtxAttach(), and must be current to the calling thread. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxDetach(CUcontext ctx) { return checkResult(cuCtxDetachNative(ctx)); } private static native int cuCtxDetachNative(CUcontext ctx); /** * Pushes a context on the current CPU thread. * *
     * CUresult cuCtxPushCurrent (
     *      CUcontext ctx )
     * 
*
*

Pushes a context on the current CPU * thread. Pushes the given context ctx onto the CPU thread's * stack of current contexts. The specified context becomes the CPU * thread's current context, so all CUDA * functions that operate on the current * context are affected. *

*

The previous current context may be made * current again by calling cuCtxDestroy() or cuCtxPopCurrent(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to push * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxPushCurrent(CUcontext ctx) { return checkResult(cuCtxPushCurrentNative(ctx)); } private static native int cuCtxPushCurrentNative(CUcontext ctx); /** * Pops the current CUDA context from the current CPU thread. * *
     * CUresult cuCtxPopCurrent (
     *      CUcontext* pctx )
     * 
*
*

Pops the current CUDA context from the * current CPU thread. Pops the current CUDA context from the CPU thread * and passes back * the old context handle in *pctx. * That context may then be made current to a different CPU thread by * calling cuCtxPushCurrent(). *

*

If a context was current to the CPU * thread before cuCtxCreate() or cuCtxPushCurrent() was called, this * function makes that context current to the CPU thread again. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned new context handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxPopCurrent(CUcontext pctx) { return checkResult(cuCtxPopCurrentNative(pctx)); } private static native int cuCtxPopCurrentNative(CUcontext pctx); /** * Binds the specified CUDA context to the calling CPU thread. * *
     * CUresult cuCtxSetCurrent (
     *      CUcontext ctx )
     * 
*
*

Binds the specified CUDA context to the * calling CPU thread. Binds the specified CUDA context to the calling * CPU thread. If * ctx is NULL then the CUDA * context previously bound to the calling CPU thread is unbound and * CUDA_SUCCESS is returned. *

*

If there exists a CUDA context stack on * the calling CPU thread, this will replace the top of that stack with * ctx. If ctx is NULL then this will be equivalent * to popping the top of the calling CPU thread's CUDA context stack (or * a no-op if the * calling CPU thread's CUDA context stack * is empty). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to bind to the calling CPU thread * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCurrent * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy */ public static int cuCtxSetCurrent(CUcontext ctx) { return checkResult(cuCtxSetCurrentNative(ctx)); } private static native int cuCtxSetCurrentNative(CUcontext ctx); /** * Returns the CUDA context bound to the calling CPU thread. * *
     * CUresult cuCtxGetCurrent (
     *      CUcontext* pctx )
     * 
*
*

Returns the CUDA context bound to the * calling CPU thread. Returns in *pctx the CUDA context bound * to the calling CPU thread. If no context is bound to the calling CPU * thread then *pctx is set to NULL and CUDA_SUCCESS is * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * * @see JCudaDriver#cuCtxSetCurrent * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy */ public static int cuCtxGetCurrent(CUcontext pctx) { return checkResult(cuCtxGetCurrentNative(pctx)); } private static native int cuCtxGetCurrentNative(CUcontext pctx); /** * Returns the device ID for the current context. * *
     * CUresult cuCtxGetDevice (
     *      CUdevice* device )
     * 
*
*

Returns the device ID for the current * context. Returns in *device the ordinal of the current * context's device. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param device Returned device ID for the current context * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetDevice(CUdevice device) { return checkResult(cuCtxGetDeviceNative(device)); } private static native int cuCtxGetDeviceNative(CUdevice device); /** * Block for a context's tasks to complete. * *
     * CUresult cuCtxSynchronize (
     *      void )
     * 
*
*

Block for a context's tasks to complete. * Blocks until the device has completed all preceding requested tasks. * cuCtxSynchronize() returns an error if one of the preceding tasks * failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC * flag, the CPU thread will block until the GPU context has finished its * work. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit */ public static int cuCtxSynchronize() { return checkResult(cuCtxSynchronizeNative()); } private static native int cuCtxSynchronizeNative(); /** * Loads a compute module. * *
     * CUresult cuModuleLoad (
     *      CUmodule* module,
     *      const char* fname )
     * 
*
*

Loads a compute module. Takes a filename * fname and loads the corresponding module module * into the current context. The CUDA driver API does not attempt to * lazily allocate the resources needed by a module; if the * memory for functions and data (constant * and global) needed by the module cannot be allocated, cuModuleLoad() * fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc * or handwritten, or a fatbin file as output by nvcc * from toolchain 4.0 or later. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param fname Filename of module to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoad(CUmodule module, String fname) { return checkResult(cuModuleLoadNative(module, fname)); } private static native int cuModuleLoadNative(CUmodule module, String fname); /** * Load a module's data. * *
     * CUresult cuModuleLoadData (
     *      CUmodule* module,
     *      const void* image )
     * 
*
*

Load a module's data. Takes a pointer * image and loads the corresponding module module * into the current context. The pointer may be obtained by mapping a * cubin or PTX or fatbin file, passing a cubin or PTX or * fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable * resources and * using operating system calls such as * Windows FindResource() to obtain the pointer. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param image Module data to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadData(CUmodule module, byte image[]) { return checkResult(cuModuleLoadDataNative(module, image)); } private static native int cuModuleLoadDataNative(CUmodule module, byte image[]); /** * Load a module's data with options.
*
* Note: It is hardly possible to properly pass in the required * option values for this method. Thus, the arguments here must be
* numOptions=0
* options=new int[0]
* optionValues=Pointer.to(new int[0]))
* For passing in real options, use * {@link #cuModuleLoadDataJIT(CUmodule, Pointer, JITOptions)} instead * *
     * CUresult cuModuleLoadDataEx (
     *      CUmodule* module,
     *      const void* image,
     *      unsigned int  numOptions,
     *      CUjit_option* options,
     *      void** optionValues )
     * 
*
*

Load a module's data with options. Takes * a pointer image and loads the corresponding module module into the current context. The pointer may be obtained by * mapping a cubin or PTX or fatbin file, passing a cubin or PTX or * fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable * resources and * using operating system calls such as * Windows FindResource() to obtain the pointer. Options are * passed as an array via options and any corresponding * parameters are passed in optionValues. The number of total * options is supplied via numOptions. Any outputs will be * returned via optionValues. Supported options are (types for * the option values are specified in parentheses after the option name): *

*
    *
  • *

    CU_JIT_MAX_REGISTERS: (unsigned * int) input specifies the maximum number of registers per thread; *

    *
  • *
  • *

    CU_JIT_THREADS_PER_BLOCK: * (unsigned int) input specifies number of threads per block to target * compilation for; output returns the number of threads * the compiler actually targeted; *

    *
  • *
  • *

    CU_JIT_WALL_TIME: (float) * output returns the float value of wall clock time, in milliseconds, * spent compiling the PTX code; *

    *
  • *
  • *

    CU_JIT_INFO_LOG_BUFFER: (char*) * input is a pointer to a buffer in which to print any informational log * messages from PTX assembly (the buffer size * is specified via option * CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES); *

    *
  • *
  • *

    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: * (unsigned int) input is the size in bytes of the buffer; output is the * number of bytes filled with messages; *

    *
  • *
  • *

    CU_JIT_ERROR_LOG_BUFFER: * (char*) input is a pointer to a buffer in which to print any error log * messages from PTX assembly (the buffer size is specified * via option * CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); *

    *
  • *
  • *

    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: * (unsigned int) input is the size in bytes of the buffer; output is the * number of bytes filled with messages; *

    *
  • *
  • *

    CU_JIT_OPTIMIZATION_LEVEL: * (unsigned int) input is the level of optimization to apply to generated * code (0 - 4), with 4 being the default and highest * level; *

    *
  • *
  • *

    CU_JIT_TARGET_FROM_CUCONTEXT: * (No option value) causes compilation target to be determined based on * current attached context (default); *

    *
  • *
  • *
    * CU_JIT_TARGET: (unsigned int * for enumerated type CUjit_target_enum) input is the compilation target * based on supplied CUjit_target_enum; * possible values are: *
      *
    • *

      CU_TARGET_COMPUTE_10

      *
    • *
    • *

      CU_TARGET_COMPUTE_11

      *
    • *
    • *

      CU_TARGET_COMPUTE_12

      *
    • *
    • *

      CU_TARGET_COMPUTE_13

      *
    • *
    • *

      CU_TARGET_COMPUTE_20

      *
    • *
    *
    *
  • *
  • *
    * CU_JIT_FALLBACK_STRATEGY: * (unsigned int for enumerated type CUjit_fallback_enum) chooses fallback * strategy if matching cubin is not found; possible * values are: *
      *
    • *

      CU_PREFER_PTX

      *
    • *
    • *

      CU_PREFER_BINARY

      *
    • *
    *
    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param image Module data to load * @param numOptions Number of options * @param options Options for JIT * @param optionValues Option values for JIT * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadDataEx (CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues) { return checkResult(cuModuleLoadDataExNative(phMod, p, numOptions, options, optionValues)); } private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues); /** * Load a module's data. * *
     * CUresult cuModuleLoadFatBinary (
     *      CUmodule* module,
     *      const void* fatCubin )
     * 
*
*

Load a module's data. Takes a pointer * fatCubin and loads the corresponding module module * into the current context. The pointer represents a fat binary object, * which is a collection of different cubin and/or PTX * files, all representing the same device * code, but compiled and optimized for different architectures. *

*

Prior to CUDA 4.0, there was no * documented API for constructing and using fat binary objects by * programmers. Starting with * CUDA 4.0, fat binary objects can be * constructed by providing the -fatbin option to nvcc. * More information can be found in the nvcc document. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param fatCubin Fat binary to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadFatBinary(CUmodule module, byte fatCubin[]) { return checkResult(cuModuleLoadFatBinaryNative(module, fatCubin)); } private static native int cuModuleLoadFatBinaryNative(CUmodule module, byte fatCubin[]); /** * Unloads a module. * *
     * CUresult cuModuleUnload (
     *      CUmodule hmod )
     * 
*
*

Unloads a module. Unloads a module hmod from the current context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hmod Module to unload * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary */ public static int cuModuleUnload(CUmodule hmod) { return checkResult(cuModuleUnloadNative(hmod)); } private static native int cuModuleUnloadNative(CUmodule hmod); /** * Returns a function handle. * *
     * CUresult cuModuleGetFunction (
     *      CUfunction* hfunc,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a function handle. Returns in * *hfunc the handle of the function of name name * located in module hmod. If no function of that name exists, * cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Returned function handle * @param hmod Module to retrieve function from * @param name Name of function to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name) { return checkResult(cuModuleGetFunctionNative(hfunc, hmod, name)); } private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name); /** * Returns a global pointer from a module. * *
     * CUresult cuModuleGetGlobal (
     *      CUdeviceptr* dptr,
     *      size_t* bytes,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a global pointer from a module. * Returns in *dptr and *bytes the base pointer and * size of the global of name name located in module hmod. If no variable of that name exists, cuModuleGetGlobal() * returns CUDA_ERROR_NOT_FOUND. Both parameters dptr and bytes are optional. If one of them is NULL, it is ignored. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned global device pointer * @param bytes Returned global size in bytes * @param hmod Module to retrieve global from * @param name Name of global to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetGlobal(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name) { return checkResult(cuModuleGetGlobalNative(dptr, bytes, hmod, name)); } private static native int cuModuleGetGlobalNative(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name); /** * Returns a handle to a texture reference. * *
     * CUresult cuModuleGetTexRef (
     *      CUtexref* pTexRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a handle to a texture reference. * Returns in *pTexRef the handle of the texture reference of * name name in the module hmod. If no texture * reference of that name exists, cuModuleGetTexRef() returns * CUDA_ERROR_NOT_FOUND. This texture reference handle should not be * destroyed, since it will be destroyed when the module is unloaded. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pTexRef Returned texture reference * @param hmod Module to retrieve texture reference from * @param name Name of texture reference to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, String name) { return checkResult(cuModuleGetTexRefNative(pTexRef, hmod, name)); } private static native int cuModuleGetTexRefNative(CUtexref pTexRef, CUmodule hmod, String name); /** * Returns a handle to a surface reference. * *
     * CUresult cuModuleGetSurfRef (
     *      CUsurfref* pSurfRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a handle to a surface reference. * Returns in *pSurfRef the handle of the surface reference of * name name in the module hmod. If no surface * reference of that name exists, cuModuleGetSurfRef() returns * CUDA_ERROR_NOT_FOUND. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pSurfRef Returned surface reference * @param hmod Module to retrieve surface reference from * @param name Name of surface reference to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetSurfRef(CUsurfref pSurfRef, CUmodule hmod, String name) { return checkResult(cuModuleGetSurfRefNative(pSurfRef, hmod, name)); } private static native int cuModuleGetSurfRefNative(CUsurfref pSurfRef, CUmodule hmod, String name); public static int cuLinkCreate(JITOptions jitOptions, CUlinkState stateOut) { return checkResult(cuLinkCreateNative(jitOptions, stateOut)); } private static native int cuLinkCreateNative(JITOptions jitOptions, CUlinkState stateOut); public static int cuLinkAddData(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions) { return checkResult(cuLinkAddDataNative(state, type, data, size, name, jitOptions)); } private static native int cuLinkAddDataNative(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions); public static int cuLinkAddFile(CUlinkState state, int type, String path, JITOptions jitOptions) { return checkResult(cuLinkAddFileNative(state, type, path, jitOptions)); } private static native int cuLinkAddFileNative(CUlinkState state, int type, String path, JITOptions jitOptions); public static int cuLinkComplete(CUlinkState state, Pointer cubinOut, long sizeOut[]) { return checkResult(cuLinkCompleteNative(state, cubinOut, sizeOut)); } private static native int cuLinkCompleteNative(CUlinkState state, Pointer cubinOut, long sizeOut[]); public static int cuLinkDestroy(CUlinkState state) { return checkResult(cuLinkDestroyNative(state)); } private static native int cuLinkDestroyNative(CUlinkState state); /** * Gets free and total memory. * *
     * CUresult cuMemGetInfo (
     *      size_t* free,
     *      size_t* total )
     * 
*
*

Gets free and total memory. Returns in * *free and *total respectively, the free and total * amount of memory available for allocation by the CUDA context, in * bytes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param free Returned free memory in bytes * @param total Returned total memory in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemGetInfo(long free[], long total[]) { return checkResult(cuMemGetInfoNative(free, total)); } private static native int cuMemGetInfoNative(long free[], long total[]); /** * Allocates page-locked host memory. * *
     * CUresult cuMemHostAlloc (
     *      void** pp,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
*
*

Allocates page-locked host memory. * Allocates bytesize bytes of host memory that is page-locked * and accessible to the device. The driver tracks the virtual memory * ranges allocated * with this function and automatically * accelerates calls to functions such as cuMemcpyHtoD(). Since the memory * can be accessed directly by the device, it can be read or written with * much higher bandwidth than pageable * memory obtained with functions such as * malloc(). Allocating excessive amounts of pinned memory may degrade * system performance, * since it reduces the amount of memory * available to the system for paging. As a result, this function is best * used sparingly * to allocate staging areas for data * exchange between host and device. *

*

The Flags parameter enables * different options to be specified that affect the allocation, as * follows. *

*
    *
  • *

    CU_MEMHOSTALLOC_PORTABLE: The * memory returned by this call will be considered as pinned memory by * all CUDA contexts, not just the one that performed * the allocation. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTALLOC_DEVICEMAP: Maps * the allocation into the CUDA address space. The device pointer to the * memory may be obtained by calling cuMemHostGetDevicePointer(). This * feature is available only on GPUs with compute capability greater than * or equal to 1.1. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTALLOC_WRITECOMBINED: * Allocates the memory as write-combined (WC). WC memory can be * transferred across the PCI Express bus more quickly on some * system configurations, but * cannot be read efficiently by most CPUs. WC memory is a good option * for buffers that will be written * by the CPU and read by the GPU * via mapped pinned memory or host->device transfers. *

    *
  • *
*

*

All of these flags are orthogonal to * one another: a developer may allocate memory that is portable, mapped * and/or write-combined * with no restrictions. *

*

The CUDA context must have been created * with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_DEVICEMAP * flag to have any effect. *

*

The CU_MEMHOSTALLOC_DEVICEMAP flag may * be specified on CUDA contexts for devices that do not support mapped * pinned memory. The failure is deferred to cuMemHostGetDevicePointer() * because the memory may be mapped into other CUDA contexts via the * CU_MEMHOSTALLOC_PORTABLE flag. *

*

The memory allocated by this function * must be freed with cuMemFreeHost(). *

*

Note all host memory allocated using * cuMemHostAlloc() will automatically be immediately accessible to all * contexts on all devices which support unified addressing (as may be * queried * using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). * Unless the flag CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device * pointer that may be used to access this host memory from those contexts * is always equal to the returned * host pointer *pp. If the flag * CU_MEMHOSTALLOC_WRITECOMBINED is specified, then the function * cuMemHostGetDevicePointer() must be used to query the device pointer, * even if the context supports unified addressing. See Unified Addressing * for additional details. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pp Returned host pointer to page-locked memory * @param bytesize Requested allocation size in bytes * @param Flags Flags for allocation request * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemHostAlloc(Pointer pp, long bytes, int Flags) { return checkResult(cuMemHostAllocNative(pp, bytes, Flags)); } private static native int cuMemHostAllocNative(Pointer pp, long bytes, int Flags); /** * Passes back device pointer of mapped pinned memory. * *
     * CUresult cuMemHostGetDevicePointer (
     *      CUdeviceptr* pdptr,
     *      void* p,
     *      unsigned int  Flags )
     * 
*
*

Passes back device pointer of mapped * pinned memory. Passes back the device pointer pdptr * corresponding to the mapped, pinned host buffer p allocated * by cuMemHostAlloc. *

*

cuMemHostGetDevicePointer() will fail * if the CU_MEMHOSTALLOC_DEVICEMAP flag was not specified at the time * the memory was allocated, or if the function is called on a GPU that * does not support * mapped pinned memory. *

*

Flags provides for future * releases. For now, it must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pdptr Returned device pointer * @param p Host pointer * @param Flags Options (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags) { return checkResult(cuMemHostGetDevicePointerNative(ret, p, Flags)); } private static native int cuMemHostGetDevicePointerNative(CUdeviceptr ret, Pointer p, int Flags); /** * Passes back flags that were used for a pinned allocation. * *
     * CUresult cuMemHostGetFlags (
     *      unsigned int* pFlags,
     *      void* p )
     * 
*
*

Passes back flags that were used for a * pinned allocation. Passes back the flags pFlags that were * specified when allocating the pinned host buffer p allocated * by cuMemHostAlloc. *

*

cuMemHostGetFlags() will fail if the * pointer does not reside in an allocation performed by cuMemAllocHost() * or cuMemHostAlloc(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pFlags Returned flags word * @param p Host pointer * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemHostAlloc */ public static int cuMemHostGetFlags (int pFlags[], Pointer p) { return checkResult(cuMemHostGetFlagsNative(pFlags, p)); } private static native int cuMemHostGetFlagsNative(int pFlags[], Pointer p); /** * Returns a handle to a compute device. * *
     * CUresult cuDeviceGetByPCIBusId (
     *      CUdevice* dev,
     *      char* pciBusId )
     * 
*
*

Returns a handle to a compute device. * Returns in *device a device handle given a PCI bus ID * string. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dev Returned device handle * @param pciBusId String in one of the following forms: [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetPCIBusId */ public static int cuDeviceGetByPCIBusId(CUdevice dev, String pciBusId) { return checkResult(cuDeviceGetByPCIBusIdNative(dev, pciBusId)); } private static native int cuDeviceGetByPCIBusIdNative(CUdevice dev, String pciBusId); public static int cuMemAllocManaged(CUdeviceptr dptr, long bytesize, int flags) { return checkResult(cuMemAllocManagedNative(dptr, bytesize, flags)); } private static native int cuMemAllocManagedNative(CUdeviceptr dptr, long bytesize, int flags); /** * Returns a PCI Bus Id string for the device. * *
     * CUresult cuDeviceGetPCIBusId (
     *      char* pciBusId,
     *      int  len,
     *      CUdevice dev )
     * 
*
*

Returns a PCI Bus Id string for the * device. Returns an ASCII string identifying the device dev * in the NULL-terminated string pointed to by pciBusId. len specifies the maximum length of the string that may be * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pciBusId Returned identifier string for the device in the following format [domain]:[bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values. pciBusId should be large enough to store 13 characters including the NULL-terminator. * @param len Maximum length of string to store in name * @param dev Device to get identifier string for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetByPCIBusId */ public static int cuDeviceGetPCIBusId(String pciBusId[], int len, CUdevice dev) { return checkResult(cuDeviceGetPCIBusIdNative(pciBusId, len, dev)); } private static native int cuDeviceGetPCIBusIdNative(String pciBusId[], int len, CUdevice dev); /** * Gets an interprocess handle for a previously allocated event. * *
     * CUresult cuIpcGetEventHandle (
     *      CUipcEventHandle* pHandle,
     *      CUevent event )
     * 
*
*

Gets an interprocess handle for a * previously allocated event. Takes as input a previously allocated * event. This event must * have been created with the * CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags set. This * opaque handle may be copied into other processes and opened with * cuIpcOpenEventHandle to allow efficient hardware synchronization * between GPU work in different processes. *

*

After the event has been been opened in * the importing process, cuEventRecord, cuEventSynchronize, * cuStreamWaitEvent and cuEventQuery may be used in either process. * Performing operations on the imported event after the exported event * has been freed with cuEventDestroy will result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pHandle Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle * @param event Event allocated with CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcGetEventHandle(CUipcEventHandle pHandle, CUevent event) { return checkResult(cuIpcGetEventHandleNative(pHandle, event)); } private static native int cuIpcGetEventHandleNative(CUipcEventHandle pHandle, CUevent event); /** * Opens an interprocess event handle for use in the current process. * *
     * CUresult cuIpcOpenEventHandle (
     *      CUevent* phEvent,
     *      CUipcEventHandle handle )
     * 
*
*

Opens an interprocess event handle for * use in the current process. Opens an interprocess event handle exported * from another * process with cuIpcGetEventHandle. This * function returns a CUevent that behaves like a locally created event * with the CU_EVENT_DISABLE_TIMING flag specified. This event must be * freed with cuEventDestroy. *

*

Performing operations on the imported * event after the exported event has been freed with cuEventDestroy will * result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param phEvent Returns the imported event * @param handle Interprocess handle to open * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcOpenEventHandle(CUevent phEvent, CUipcEventHandle handle) { return checkResult(cuIpcOpenEventHandleNative(phEvent, handle)); } private static native int cuIpcOpenEventHandleNative(CUevent phEvent, CUipcEventHandle handle); /** * Gets an interprocess memory handle for an existing device memory * allocation. * *
     * CUresult cuIpcGetMemHandle (
     *      CUipcMemHandle* pHandle,
     *      CUdeviceptr dptr )
     * 
*
*

/brief Gets an interprocess memory * handle for an existing device memory allocation *

*

Takes a pointer to the base of an * existing device memory allocation created with cuMemAlloc and exports * it for use in another process. This is a lightweight operation and may * be called multiple times on an allocation * without adverse effects. *

*

If a region of memory is freed with * cuMemFree and a subsequent call to cuMemAlloc returns memory with the * same device address, cuIpcGetMemHandle will return a unique handle for * the new memory. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pHandle Pointer to user allocated CUipcMemHandle to return the handle in. * @param dptr Base pointer to previously allocated device memory * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_MAP_FAILED, * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcGetMemHandle(CUipcMemHandle pHandle, CUdeviceptr dptr) { return checkResult(cuIpcGetMemHandleNative(pHandle, dptr)); } private static native int cuIpcGetMemHandleNative(CUipcMemHandle pHandle, CUdeviceptr dptr); /** * *
     * CUresult cuIpcOpenMemHandle (
     *      CUdeviceptr* pdptr,
     *      CUipcMemHandle handle,
     *      unsigned int  Flags )
     * 
*
*

/brief Opens an interprocess memory * handle exported from another process and returns a device pointer * usable in the local * process. *

*

Maps memory exported from another * process with cuIpcGetMemHandle into the current device address space. * For contexts on different devices cuIpcOpenMemHandle can attempt to * enable peer access between the devices as if the user called * cuCtxEnablePeerAccess. This behavior is controlled by the * CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. cuDeviceCanAccessPeer can * determine if a mapping is possible. *

*

Contexts that may open CUipcMemHandles * are restricted in the following way. CUipcMemHandles from each CUdevice * in a given process may only be opened by one CUcontext per CUdevice * per other process. *

*

Memory returned from cuIpcOpenMemHandle * must be freed with cuIpcCloseMemHandle. *

*

Calling cuMemFree on an exported memory * region before calling cuIpcCloseMemHandle in the importing context will * result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pdptr Returned device pointer * @param handle CUipcMemHandle to open * @param Flags Flags for this operation. Must be specified as CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_TOO_MANY_PEERS * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcCloseMemHandle * @see JCudaDriver#cuCtxEnablePeerAccess * @see JCudaDriver#cuDeviceCanAccessPeer */ public static int cuIpcOpenMemHandle(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags) { return checkResult(cuIpcOpenMemHandleNative(pdptr, handle, Flags)); } private static native int cuIpcOpenMemHandleNative(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags); /** * Close memory mapped with cuIpcOpenMemHandle. * *
     * CUresult cuIpcCloseMemHandle (
     *      CUdeviceptr dptr )
     * 
*
*

Close memory mapped with cuIpcOpenMemHandle. * Unmaps memory returnd by cuIpcOpenMemHandle. The original allocation * in the exporting process as well as imported mappings in other processes * will be unaffected. *

*

Any resources used to enable peer access * will be freed if this is the last mapping using them. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param dptr Device pointer returned by cuIpcOpenMemHandle * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_INVALID_HANDLE, * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle */ public static int cuIpcCloseMemHandle(CUdeviceptr dptr) { return checkResult(cuIpcCloseMemHandleNative(dptr)); } private static native int cuIpcCloseMemHandleNative(CUdeviceptr dptr); /** * Registers an existing host memory range for use by CUDA. * *
     * CUresult cuMemHostRegister (
     *      void* p,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
*
*

Registers an existing host memory range * for use by CUDA. Page-locks the memory range specified by p * and bytesize and maps it for the device(s) as specified by * Flags. This memory range also is added to the same tracking * mechanism as cuMemHostAlloc to automatically accelerate calls to * functions such as cuMemcpyHtoD(). Since the memory can be accessed * directly by the device, it can be read or written with much higher * bandwidth than pageable * memory that has not been registered. * Page-locking excessive amounts of memory may degrade system performance, * since it reduces * the amount of memory available to the * system for paging. As a result, this function is best used sparingly * to register staging * areas for data exchange between host and * device. *

*

This function has limited support on * Mac OS X. OS 10.7 or higher is required. *

*

The Flags parameter enables * different options to be specified that affect the allocation, as * follows. *

*
    *
  • *

    CU_MEMHOSTREGISTER_PORTABLE: * The memory returned by this call will be considered as pinned memory * by all CUDA contexts, not just the one that performed * the allocation. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTREGISTER_DEVICEMAP: * Maps the allocation into the CUDA address space. The device pointer to * the memory may be obtained by calling cuMemHostGetDevicePointer(). This * feature is available only on GPUs with compute capability greater than * or equal to 1.1. *

    *
  • *
*

*

All of these flags are orthogonal to * one another: a developer may page-lock memory that is portable or * mapped with no restrictions. *

*

The CUDA context must have been created * with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTREGISTER_DEVICEMAP * flag to have any effect. *

*

The CU_MEMHOSTREGISTER_DEVICEMAP flag * may be specified on CUDA contexts for devices that do not support * mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer() * because the memory may be mapped into other CUDA contexts via the * CU_MEMHOSTREGISTER_PORTABLE flag. *

*

The memory page-locked by this function * must be unregistered with cuMemHostUnregister(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Host pointer to memory to page-lock * @param bytesize Size in bytes of the address range to page-lock * @param Flags Flags for allocation request * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED * * @see JCudaDriver#cuMemHostUnregister * @see JCudaDriver#cuMemHostGetFlags * @see JCudaDriver#cuMemHostGetDevicePointer */ public static int cuMemHostRegister(Pointer p, long bytesize, int Flags) { return checkResult(cuMemHostRegisterNative(p, bytesize, Flags)); } private static native int cuMemHostRegisterNative(Pointer p, long bytesize, int Flags); /** * Unregisters a memory range that was registered with cuMemHostRegister. * *
     * CUresult cuMemHostUnregister (
     *      void* p )
     * 
*
*

Unregisters a memory range that was * registered with cuMemHostRegister. Unmaps the memory range whose base * address is specified * by p, and makes it pageable * again. *

*

The base address must be the same one * specified to cuMemHostRegister(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Host pointer to memory to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, * * @see JCudaDriver#cuMemHostRegister */ public static int cuMemHostUnregister(Pointer p) { return checkResult(cuMemHostUnregisterNative(p)); } private static native int cuMemHostUnregisterNative(Pointer p); /** * Copies memory. * *
     * CUresult cuMemcpy (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount )
     * 
*
*

Copies memory. Copies data between two * pointers. dst and src are base pointers of the * destination and source, respectively. ByteCount specifies * the number of bytes to copy. Note that this function infers the type * of the transfer (host to host, host to device, * device to device, or device to host) from * the pointer values. This function is only allowed in contexts which * support unified * addressing. Note that this function is * synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dst Destination unified virtual address space pointer * @param src Source unified virtual address space pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy(CUdeviceptr dst, CUdeviceptr src, long ByteCount) { return checkResult(cuMemcpyNative(dst, src, ByteCount)); } private static native int cuMemcpyNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount); /** * Copies device memory between two contexts. * *
     * CUresult cuMemcpyPeer (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount )
     * 
*
*

Copies device memory between two contexts. * Copies from device memory in one context to device memory in another * context. * dstDevice is the base device * pointer of the destination memory and dstContext is the * destination context. srcDevice is the base device pointer of * the source memory and srcContext is the source pointer. ByteCount specifies the number of bytes to copy. *

*

Note that this function is asynchronous * with respect to the host, but serialized with respect all pending and * future asynchronous * work in to the current context, srcContext, and dstContext (use cuMemcpyPeerAsync to * avoid this synchronization). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstContext Destination context * @param srcDevice Source device pointer * @param srcContext Source context * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpy3DPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount) { return cuMemcpyPeerNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount); } private static native int cuMemcpyPeerNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount); /** * Allocates device memory. * *
     * CUresult cuMemAlloc (
     *      CUdeviceptr* dptr,
     *      size_t bytesize )
     * 
*
*

Allocates device memory. Allocates bytesize bytes of linear memory on the device and returns in *dptr a pointer to the allocated memory. The allocated memory is * suitably aligned for any kind of variable. The memory is not cleared. * If bytesize is 0, cuMemAlloc() * returns CUDA_ERROR_INVALID_VALUE. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned device pointer * @param bytesize Requested allocation size in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) { return checkResult(cuMemAllocNative(dptr, bytesize)); } private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize); /** * Allocates pitched device memory. * *
     * CUresult cuMemAllocPitch (
     *      CUdeviceptr* dptr,
     *      size_t* pPitch,
     *      size_t WidthInBytes,
     *      size_t Height,
     *      unsigned int  ElementSizeBytes )
     * 
*
*

Allocates pitched device memory. * Allocates at least WidthInBytes * Height bytes of * linear memory on the device and returns in *dptr a pointer * to the allocated memory. The function may pad the allocation to ensure * that corresponding pointers in any given * row will continue to meet the alignment * requirements for coalescing as the address is updated from row to row. * ElementSizeBytes specifies the size of the largest reads and * writes that will be performed on the memory range. ElementSizeBytes may be 4, 8 or 16 (since coalesced memory * transactions are not possible on other data sizes). If ElementSizeBytes is smaller than the actual read/write size of a * kernel, the kernel will run correctly, but possibly at reduced speed. * The * pitch returned in *pPitch by * cuMemAllocPitch() is the width in bytes of the allocation. The intended * usage of pitch is as a separate parameter of the allocation, used to * compute addresses within the 2D array. * Given the row and column of an array element of type T, * the address is computed as: *

   T* pElement = (T*)((char*)BaseAddress
     * + Row * Pitch) + Column;
*

*

The pitch returned by cuMemAllocPitch() * is guaranteed to work with cuMemcpy2D() under all circumstances. For * allocations of 2D arrays, it is recommended that programmers consider * performing pitch allocations * using cuMemAllocPitch(). Due to alignment * restrictions in the hardware, this is especially true if the application * will be performing 2D memory copies * between different regions of device * memory (whether linear memory or CUDA arrays). *

*

The byte alignment of the pitch returned * by cuMemAllocPitch() is guaranteed to match or exceed the alignment * requirement for texture binding with cuTexRefSetAddress2D(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned device pointer * @param pPitch Returned pitch of allocation in bytes * @param WidthInBytes Requested allocation width in bytes * @param Height Requested allocation height in rows * @param ElementSizeBytes Size of largest reads/writes for range * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAllocPitch(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes) { return checkResult(cuMemAllocPitchNative(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes)); } private static native int cuMemAllocPitchNative(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes); /** * Frees device memory. * *
     * CUresult cuMemFree (
     *      CUdeviceptr dptr )
     * 
*
*

Frees device memory. Frees the memory * space pointed to by dptr, which must have been returned by a * previous call to cuMemAlloc() or cuMemAllocPitch(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Pointer to memory to free * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemFree(CUdeviceptr dptr) { return checkResult(cuMemFreeNative(dptr)); } private static native int cuMemFreeNative(CUdeviceptr dptr); /** * Get information on memory allocations. * *
     * CUresult cuMemGetAddressRange (
     *      CUdeviceptr* pbase,
     *      size_t* psize,
     *      CUdeviceptr dptr )
     * 
*
*

Get information on memory allocations. * Returns the base address in *pbase and size in *psize * of the allocation by cuMemAlloc() or cuMemAllocPitch() that contains * the input pointer dptr. Both parameters pbase and * psize are optional. If one of them is NULL, it is ignored. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pbase Returned base address * @param psize Returned size of device memory allocation * @param dptr Device pointer to query * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemGetAddressRange(CUdeviceptr pbase, long psize[], CUdeviceptr dptr) { return checkResult(cuMemGetAddressRangeNative(pbase, psize, dptr)); } private static native int cuMemGetAddressRangeNative(CUdeviceptr pbase, long psize[], CUdeviceptr dptr); /** * Allocates page-locked host memory. * *
     * CUresult cuMemAllocHost (
     *      void** pp,
     *      size_t bytesize )
     * 
*
*

Allocates page-locked host memory. * Allocates bytesize bytes of host memory that is page-locked * and accessible to the device. The driver tracks the virtual memory * ranges allocated * with this function and automatically * accelerates calls to functions such as cuMemcpy(). Since the memory * can be accessed directly by the device, it can be read or written with * much higher bandwidth than pageable * memory obtained with functions such as * malloc(). Allocating excessive amounts of memory with cuMemAllocHost() * may degrade system performance, since it reduces the amount of memory * available to the system for paging. As a result, this * function is best used sparingly to * allocate staging areas for data exchange between host and device. *

*

Note all host memory allocated using * cuMemHostAlloc() will automatically be immediately accessible to all * contexts on all devices which support unified addressing (as may be * queried * using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). * The device pointer that may be used to access this host memory from * those contexts is always equal to the returned host * pointer *pp. See Unified * Addressing for additional details. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pp Returned host pointer to page-locked memory * @param bytesize Requested allocation size in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAllocHost(Pointer pointer, long bytesize) { return checkResult(cuMemAllocHostNative(pointer, bytesize)); } private static native int cuMemAllocHostNative(Pointer pp, long bytesize); /** * Frees page-locked host memory. * *
     * CUresult cuMemFreeHost (
     *      void* p )
     * 
*
*

Frees page-locked host memory. Frees * the memory space pointed to by p, which must have been * returned by a previous call to cuMemAllocHost(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Pointer to memory to free * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemFreeHost(Pointer p) { return checkResult(cuMemFreeHostNative(p)); } private static native int cuMemFreeHostNative(Pointer p); /** * Copies memory from Host to Device. * *
     * CUresult cuMemcpyHtoD (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Host to Device. * Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, * respectively. ByteCount specifies the number of bytes to * copy. Note that this function is synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount) { return checkResult(cuMemcpyHtoDNative(dstDevice, srcHost, ByteCount)); } private static native int cuMemcpyHtoDNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount); /** * Copies memory from Device to Host. * *
     * CUresult cuMemcpyDtoH (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Host. * Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination host pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount)); } private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Device to Device. * *
     * CUresult cuMemcpyDtoD (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Device. * Copies from device memory to device memory. dstDevice and * srcDevice are the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is asynchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoDNative(dstDevice, srcDevice, ByteCount)); } private static native int cuMemcpyDtoDNative(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Device to Array. * *
     * CUresult cuMemcpyDtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Array. * Copies from device memory to a 1D CUDA array. dstArray and * dstOffset specify the CUDA array handle and starting index * of the destination data. srcDevice specifies the base pointer * of the source. ByteCount specifies the number of bytes to * copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoA(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoANative(dstArray, dstIndex, srcDevice, ByteCount)); } private static native int cuMemcpyDtoANative(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Array to Device. * *
     * CUresult cuMemcpyAtoD (
     *      CUdeviceptr dstDevice,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Device. * Copies from one 1D CUDA array to device memory. dstDevice * specifies the base pointer of the destination and must be naturally * aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array handle and the offset in bytes * into the array where the copy is to begin. ByteCount specifies * the number of bytes to copy and must be evenly divisible by the array * element size. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount) { return checkResult(cuMemcpyAtoDNative(dstDevice, hSrc, SrcIndex, ByteCount)); } private static native int cuMemcpyAtoDNative(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount); /** * Copies memory from Host to Array. * *
     * CUresult cuMemcpyHtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Host to Array. Copies * from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in * bytes of the destination data. pSrc specifies the base * address of the source. ByteCount specifies the number of * bytes to copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyHtoA(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount) { return checkResult(cuMemcpyHtoANative(dstArray, dstIndex, pSrc, ByteCount)); } private static native int cuMemcpyHtoANative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount); /** * Copies memory from Array to Host. * *
     * CUresult cuMemcpyAtoH (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Host. Copies * from one 1D CUDA array to host memory. dstHost specifies the * base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in * bytes of the source data. ByteCount specifies the number of * bytes to copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination device pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount) { return checkResult(cuMemcpyAtoHNative(dstHost, srcArray, srcIndex, ByteCount)); } private static native int cuMemcpyAtoHNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount); /** * Copies memory from Array to Array. * *
     * CUresult cuMemcpyAtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Array. * Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA * arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source offsets in bytes * into the CUDA arrays. ByteCount is the number of bytes to be * copied. The size of the elements in the CUDA arrays need not be the * same format, but the elements * must be the same size; and count must be * evenly divisible by that size. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoA(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount) { return checkResult(cuMemcpyAtoANative(dstArray, dstIndex, srcArray, srcIndex, ByteCount)); } private static native int cuMemcpyAtoANative(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2D (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *           const void *srcHost;
     *           CUdeviceptr srcDevice;
     *           CUarray srcArray;
     *           unsigned int srcPitch;
     * 
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *           void *dstHost;
     *           CUdeviceptr dstDevice;
     *           CUarray dstArray;
     *           unsigned int dstPitch;
     * 
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy) { return checkResult(cuMemcpy2DNative(pCopy)); } private static native int cuMemcpy2DNative(CUDA_MEMCPY2D pCopy); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2DUnaligned (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy) { return checkResult(cuMemcpy2DUnalignedNative(pCopy)); } private static native int cuMemcpy2DUnalignedNative(CUDA_MEMCPY2D pCopy); /** * Copies memory for 3D arrays. * *
     * CUresult cuMemcpy3D (
     *      const CUDA_MEMCPY3D* pCopy )
     * 
*
*

Copies memory for 3D arrays. Perform a * 3D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY3D structure is defined as: *

*
        typedef struct CUDA_MEMCPY3D_st
     * {
     * 
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost, srcPitch and srcHeight specify the (host) base address of the * source data, the bytes per row, and the height of * each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice, srcPitch and srcHeight specify the (device) base address of * the source data, the bytes per row, and the height * of each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice, * srcPitch and srcHeight are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice, dstPitch and dstHeight are ignored. *

*
    *
  • *

    srcXInBytes, srcY and srcZ * specify the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes, dstY and dstZ * specify the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes, Height and Depth * specify the width (in bytes), height and depth of the 3D copy being * performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy3D() returns an error if any * pitch is greater than the maximum allowed * (CU_DEVICE_ATTRIBUTE_MAX_PITCH). *

*

* The srcLOD and dstLOD members of the * CUDA_MEMCPY3D structure must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy) { return checkResult(cuMemcpy3DNative(pCopy)); } private static native int cuMemcpy3DNative(CUDA_MEMCPY3D pCopy); /** * Copies memory between contexts. * *
     * CUresult cuMemcpy3DPeer (
     *      const CUDA_MEMCPY3D_PEER* pCopy )
     * 
*
*

Copies memory between contexts. Perform * a 3D memory copy according to the parameters specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER structure * for documentation of its parameters. *

*

Note that this function is synchronous * with respect to the host only if the source or destination memory is * of type CU_MEMORYTYPE_HOST. Note also that this copy is serialized with * respect all pending and future asynchronous work in to the current * context, * the copy's source context, and the copy's * destination context (use cuMemcpy3DPeerAsync to avoid this * synchronization). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpy3DPeer(CUDA_MEMCPY3D_PEER pCopy) { return checkResult(cuMemcpy3DPeerNative(pCopy)); } private static native int cuMemcpy3DPeerNative(CUDA_MEMCPY3D_PEER pCopy); /** * Copies memory asynchronously. * *
     * CUresult cuMemcpyAsync (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory asynchronously. Copies * data between two pointers. dst and src are base * pointers of the destination and source, respectively. ByteCount * specifies the number of bytes to copy. Note that this function infers * the type of the transfer (host to host, host to device, * device to device, or device to host) from * the pointer values. This function is only allowed in contexts which * support unified * addressing. Note that this function is * asynchronous and can optionally be associated to a stream by passing a * non-zero hStream argument *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dst Destination unified virtual address space pointer * @param src Source unified virtual address space pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyAsyncNative(dst, src, ByteCount, hStream)); } private static native int cuMemcpyAsyncNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream); /** * Copies device memory between two contexts asynchronously. * *
     * CUresult cuMemcpyPeerAsync (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies device memory between two contexts * asynchronously. Copies from device memory in one context to device * memory in another * context. dstDevice is the base * device pointer of the destination memory and dstContext is * the destination context. srcDevice is the base device pointer * of the source memory and srcContext is the source pointer. * ByteCount specifies the number of bytes to copy. Note that * this function is asynchronous with respect to the host and all work in * other * streams in other devices. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstContext Destination context * @param srcDevice Source device pointer * @param srcContext Source context * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpy3DPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyPeerAsyncNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream)); } private static native int cuMemcpyPeerAsyncNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream); /** * Copies memory from Host to Device. * *
     * CUresult cuMemcpyHtoDAsync (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Host to Device. * Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, * respectively. ByteCount specifies the number of bytes to * copy. *

*

cuMemcpyHtoDAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyHtoDAsyncNative(dstDevice, srcHost, ByteCount, hStream)); } private static native int cuMemcpyHtoDAsyncNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream); /** * Copies memory from Device to Host. * *
     * CUresult cuMemcpyDtoHAsync (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Device to Host. * Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. *

*

cuMemcpyDtoHAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination host pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyDtoHAsync(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyDtoHAsyncNative(dstHost, srcDevice, ByteCount, hStream)); } private static native int cuMemcpyDtoHAsyncNative(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream); /** * Copies memory from Device to Device. * *
     * CUresult cuMemcpyDtoDAsync (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Device to Device. * Copies from device memory to device memory. dstDevice and * srcDevice are the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is asynchronous and can optionally be * associated to a stream * by passing a non-zero hStream * argument *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyDtoDAsyncNative(dstDevice, srcDevice, ByteCount, hStream)); } private static native int cuMemcpyDtoDAsyncNative(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream); /** * Copies memory from Host to Array. * *
     * CUresult cuMemcpyHtoAAsync (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Host to Array. Copies * from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in * bytes of the destination data. srcHost specifies the base * address of the source. ByteCount specifies the number of * bytes to copy. *

*

cuMemcpyHtoAAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyHtoAAsync(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyHtoAAsyncNative(dstArray, dstIndex, pSrc, ByteCount, hStream)); } private static native int cuMemcpyHtoAAsyncNative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream); /** * Copies memory from Array to Host. * *
     * CUresult cuMemcpyAtoHAsync (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Array to Host. Copies * from one 1D CUDA array to host memory. dstHost specifies the * base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in * bytes of the source data. ByteCount specifies the number of * bytes to copy. *

*

cuMemcpyAtoHAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyAtoHAsyncNative(dstHost, srcArray, srcIndex, ByteCount, hStream)); } private static native int cuMemcpyAtoHAsyncNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2DAsync (
     *      const CUDA_MEMCPY2D* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*

cuMemcpy2DAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream) { return checkResult(cuMemcpy2DAsyncNative(pCopy, hStream)); } private static native int cuMemcpy2DAsyncNative(CUDA_MEMCPY2D pCopy, CUstream hStream); /** * Copies memory for 3D arrays. * *
     * CUresult cuMemcpy3DAsync (
     *      const CUDA_MEMCPY3D* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory for 3D arrays. Perform a * 3D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY3D structure is defined as: *

*
        typedef struct CUDA_MEMCPY3D_st
     * {
     * 
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     * 
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost, srcPitch and srcHeight specify the (host) base address of the * source data, the bytes per row, and the height of * each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice, srcPitch and srcHeight specify the (device) base address of * the source data, the bytes per row, and the height * of each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice, * srcPitch and srcHeight are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice, dstPitch and dstHeight are ignored. *

*
    *
  • *

    srcXInBytes, srcY and srcZ * specify the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes, dstY and dstZ * specify the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes, Height and Depth * specify the width (in bytes), height and depth of the 3D copy being * performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy3D() returns an error if any * pitch is greater than the maximum allowed * (CU_DEVICE_ATTRIBUTE_MAX_PITCH). *

*

cuMemcpy3DAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*

The srcLOD and dstLOD members of the * CUDA_MEMCPY3D structure must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream) { return checkResult(cuMemcpy3DAsyncNative(pCopy, hStream)); } private static native int cuMemcpy3DAsyncNative(CUDA_MEMCPY3D pCopy, CUstream hStream); /** * Copies memory between contexts asynchronously. * *
     * CUresult cuMemcpy3DPeerAsync (
     *      const CUDA_MEMCPY3D_PEER* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory between contexts * asynchronously. Perform a 3D memory copy according to the parameters * specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER * structure for documentation of its parameters. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream) { return checkResult(cuMemcpy3DPeerAsyncNative(pCopy, hStream)); } private static native int cuMemcpy3DPeerAsyncNative(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream); /** * Initializes device memory. * *
     * CUresult cuMemsetD8 (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 8-bit values to the specified value uc. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param uc Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD8(CUdeviceptr dstDevice, byte uc, long N) { return checkResult(cuMemsetD8Native(dstDevice, uc, N)); } private static native int cuMemsetD8Native(CUdeviceptr dstDevice, byte uc, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD16 (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 16-bit values to the specified value us. The dstDevice pointer must be two byte aligned. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param us Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD16(CUdeviceptr dstDevice, short us, long N) { return checkResult(cuMemsetD16Native(dstDevice, us, N)); } private static native int cuMemsetD16Native(CUdeviceptr dstDevice, short us, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD32 (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 32-bit values to the specified value ui. The dstDevice pointer must be four byte aligned. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param ui Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD32(CUdeviceptr dstDevice, int ui, long N) { return checkResult(cuMemsetD32Native(dstDevice, ui, N)); } private static native int cuMemsetD32Native(CUdeviceptr dstDevice, int ui, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D8 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 8-bit values to the specified value * uc. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * This function performs fastest when the pitch is one that has been * passed * back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param uc Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D8(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height) { return checkResult(cuMemsetD2D8Native(dstDevice, dstPitch, uc, Width, Height)); } private static native int cuMemsetD2D8Native(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D16 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 16-bit values to the specified value * us. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * The dstDevice pointer and dstPitch offset must be * two byte aligned. This function performs fastest when the pitch is one * that has been passed back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param us Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D16(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height) { return checkResult(cuMemsetD2D16Native(dstDevice, dstPitch, us, Width, Height)); } private static native int cuMemsetD2D16Native(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D32 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 32-bit values to the specified value * ui. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * The dstDevice pointer and dstPitch offset must be * four byte aligned. This function performs fastest when the pitch is * one that has been passed back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param ui Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D32(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height) { return checkResult(cuMemsetD2D32Native(dstDevice, dstPitch, ui, Width, Height)); } private static native int cuMemsetD2D32Native(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height); /** * Sets device memory. * *
     * CUresult cuMemsetD8Async (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 8-bit values to the specified value uc. *

*

cuMemsetD8Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param uc Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD8Async(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream) { return checkResult(cuMemsetD8AsyncNative(dstDevice, uc, N, hStream)); } private static native int cuMemsetD8AsyncNative(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD16Async (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 16-bit values to the specified value us. * The dstDevice pointer must be two byte aligned. *

*

cuMemsetD16Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param us Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD16Async(CUdeviceptr dstDevice, short us, long N, CUstream hStream) { return checkResult(cuMemsetD16AsyncNative(dstDevice, us, N, hStream)); } private static native int cuMemsetD16AsyncNative(CUdeviceptr dstDevice, short us, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD32Async (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 32-bit values to the specified value ui. * The dstDevice pointer must be four byte aligned. *

*

cuMemsetD32Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param ui Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 */ public static int cuMemsetD32Async(CUdeviceptr dstDevice, int ui, long N, CUstream hStream) { return checkResult(cuMemsetD32AsyncNative(dstDevice, ui, N, hStream)); } private static native int cuMemsetD32AsyncNative(CUdeviceptr dstDevice, int ui, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D8Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 8-bit values to the specified value uc. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. This * function performs fastest when the pitch is one that has been passed * back by cuMemAllocPitch(). *

*

cuMemsetD2D8Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param uc Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D8Async(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D8AsyncNative(dstDevice, dstPitch, uc, Width, Height, hStream)); } private static native int cuMemsetD2D8AsyncNative(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D16Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 16-bit values to the specified value us. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. The * dstDevice pointer and dstPitch offset must be two * byte aligned. This function performs fastest when the pitch is one that * has been passed back by cuMemAllocPitch(). *

*

cuMemsetD2D16Async() is asynchronous * and can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param us Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D16Async(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D16AsyncNative(dstDevice, dstPitch, us, Width, Height, hStream)); } private static native int cuMemsetD2D16AsyncNative(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D32Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 32-bit values to the specified value ui. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. The * dstDevice pointer and dstPitch offset must be four * byte aligned. This function performs fastest when the pitch is one that * has been passed back by cuMemAllocPitch(). *

*

cuMemsetD2D32Async() is asynchronous * and can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param ui Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D32Async(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D32AsyncNative(dstDevice, dstPitch, ui, Width, Height, hStream)); } private static native int cuMemsetD2D32AsyncNative(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream); /** * Returns information about a function. * *
     * CUresult cuFuncGetAttribute (
     *      int* pi,
     *      CUfunction_attribute attrib,
     *      CUfunction hfunc )
     * 
*
*

Returns information about a function. * Returns in *pi the integer value of the attribute attrib on the kernel given by hfunc. The supported * attributes are: *

    *
  • *

    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: * The maximum number of threads per block, beyond which a launch of the * function would fail. This number depends on both the * function and the device on which * the function is currently loaded. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: * The size in bytes of statically-allocated shared memory per block * required by this function. This does not include dynamically-allocated * shared memory requested by the * user at runtime. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: * The size in bytes of user-allocated constant memory required by this * function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: * The size in bytes of local memory used by each thread of this * function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_NUM_REGS: * The number of registers used by each thread of this function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_PTX_VERSION: * The PTX virtual architecture version for which the function was * compiled. This value is the major PTX version * 10 + the * minor PTX version, so a PTX * version 1.3 function would return the value 13. Note that this may * return the undefined value * of 0 for cubins compiled prior * to CUDA 3.0. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_BINARY_VERSION: * The binary architecture version for which the function was compiled. * This value is the major binary version * 10 + the minor * binary version, so a binary * version 1.3 function would return the value 13. Note that this will * return a value of 10 for legacy * cubins that do not have a * properly-encoded binary architecture version. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pi Returned attribute value * @param attrib Attribute requested * @param hfunc Function to query attribute of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncGetAttribute (int pi[], int attrib, CUfunction func) { return checkResult(cuFuncGetAttributeNative(pi, attrib, func)); } private static native int cuFuncGetAttributeNative(int pi[], int attrib, CUfunction func); /** * Sets the block-dimensions for the function. * *
     * CUresult cuFuncSetBlockShape (
     *      CUfunction hfunc,
     *      int  x,
     *      int  y,
     *      int  z )
     * 
*
*

Sets the block-dimensions for the * function. * DeprecatedSpecifies the x, y, and z dimensions of the thread blocks that are * created when the kernel given by hfunc is launched. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to specify dimensions of * @param x X dimension * @param y Y dimension * @param z Z dimension * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) { return checkResult(cuFuncSetBlockShapeNative(hfunc, x, y, z)); } private static native int cuFuncSetBlockShapeNative(CUfunction hfunc, int x, int y, int z); /** * Sets the dynamic shared-memory size for the function. * *
     * CUresult cuFuncSetSharedSize (
     *      CUfunction hfunc,
     *      unsigned int  bytes )
     * 
*
*

Sets the dynamic shared-memory size for * the function. * DeprecatedSets through bytes * the amount of dynamic shared memory that will be available to each * thread block when the kernel given by hfunc is launched. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to specify dynamic shared-memory size for * @param bytes Dynamic shared-memory size per thread in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetSharedSize(CUfunction hfunc, int bytes) { return checkResult(cuFuncSetSharedSizeNative(hfunc, bytes)); } private static native int cuFuncSetSharedSizeNative(CUfunction hfunc, int bytes); /** * Sets the preferred cache configuration for a device function. * *
     * CUresult cuFuncSetCacheConfig (
     *      CUfunction hfunc,
     *      CUfunc_cache config )
     * 
*
*

Sets the preferred cache configuration * for a device function. On devices where the L1 cache and shared memory * use the same * hardware resources, this sets through * config the preferred cache configuration for the device * function hfunc. This is only a preference. The driver will * use the requested configuration if possible, but it is free to choose * a different * configuration if required to execute hfunc. Any context-wide preference set via cuCtxSetCacheConfig() * will be overridden by this per-function setting unless the per-function * setting is CU_FUNC_CACHE_PREFER_NONE. In that case, the current * context-wide setting will be used. *

*

This setting does nothing on devices * where the size of the L1 cache and shared memory are fixed. *

*

Launching a kernel with a different * preference than the most recent preference setting may insert a * device-side synchronization * point. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to configure cache for * @param config Requested cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetCacheConfig(CUfunction hfunc, int config) { return checkResult(cuFuncSetCacheConfigNative(hfunc, config)); } private static native int cuFuncSetCacheConfigNative(CUfunction hfunc, int config); /** * Sets the shared memory configuration for a device function. * *
     * CUresult cuFuncSetSharedMemConfig (
     *      CUfunction hfunc,
     *      CUsharedconfig config )
     * 
*
*

Sets the shared memory configuration for * a device function. On devices with configurable shared memory banks, * this function * will force all subsequent launches of * the specified device function to have the given shared memory bank size * configuration. * On any given launch of the function, the * shared memory configuration of the device will be temporarily changed * if needed to * suit the function's preferred * configuration. Changes in shared memory configuration between subsequent * launches of functions, * may introduce a device side synchronization * point. *

*

Any per-function setting of shared * memory bank size set via cuFuncSetSharedMemConfig will override the * context wide setting set with cuCtxSetSharedMemConfig. *

*

Changing the shared memory bank size * will not increase shared memory usage or affect occupancy of kernels, * but may have major * effects on performance. Larger bank sizes * will allow for greater potential bandwidth to shared memory, but will * change what * kinds of accesses to shared memory will * result in bank conflicts. *

*

This function will do nothing on devices * with fixed shared memory bank size. *

*

The supported bank configurations are: *

    *
  • *

    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: * use the context's shared memory configuration when launching this * function. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width * to be natively four bytes when launching this function. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank * width to be natively eight bytes when launching this function. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc kernel to be given a shared memory config * @param config requested shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuCtxSetSharedMemConfigcuFuncGetAttribute * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetSharedMemConfig(CUfunction hfunc, int config) { return checkResult(cuFuncSetSharedMemConfigNative(hfunc, config)); } private static native int cuFuncSetSharedMemConfigNative(CUfunction hfunc, int config); /** * Creates a 1D or 2D CUDA array. * *
     * CUresult cuArrayCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY_DESCRIPTOR* pAllocateArray )
     * 
*
*

Creates a 1D or 2D CUDA array. Creates * a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY_DESCRIPTOR is defined as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *     } CUDA_ARRAY_DESCRIPTOR;
* where:

*
    *
  • *

    Width, and Height are the width, and height of the CUDA array (in elements); * the CUDA array is one-dimensional if height is 0, two-dimensional * otherwise; *

    *
  • *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*

Here are examples of CUDA array * descriptions: *

*

Description for a CUDA array of 2048 * floats: *

    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 1;
*

*

Description for a 64 x 64 CUDA array of * floats: *

    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
*

*

Description for a width x height CUDA array of 64-bit, 4x16-bit float16's: *

   
     * CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
*

*

Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit * unsigned chars: *

    CUDA_ARRAY_DESCRIPTOR arrayDesc;
     *     desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
     *     desc.NumChannels = 2;
     *     desc.Width = width;
     *     desc.Height = height;
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned array * @param pAllocateArray Array descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray) { return checkResult(cuArrayCreateNative(pHandle, pAllocateArray)); } private static native int cuArrayCreateNative(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray); /** * Get a 1D or 2D CUDA array descriptor. * *
     * CUresult cuArrayGetDescriptor (
     *      CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
*
*

Get a 1D or 2D CUDA array descriptor. * Returns in *pArrayDescriptor a descriptor containing * information on the format and dimensions of the CUDA array hArray. It is useful for subroutines that have been passed a CUDA * array, but need to know the CUDA array parameters for validation * or other purposes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArrayDescriptor Returned array descriptor * @param hArray Array to get descriptor of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray) { return checkResult(cuArrayGetDescriptorNative(pArrayDescriptor, hArray)); } private static native int cuArrayGetDescriptorNative(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray); /** * Destroys a CUDA array. * *
     * CUresult cuArrayDestroy (
     *      CUarray hArray )
     * 
*
*

Destroys a CUDA array. Destroys the CUDA * array hArray. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hArray Array to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ARRAY_IS_MAPPED * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayDestroy(CUarray hArray) { return checkResult(cuArrayDestroyNative(hArray)); } private static native int cuArrayDestroyNative(CUarray hArray); /** * Creates a 3D CUDA array. * *
     * CUresult cuArray3DCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray )
     * 
*
*

Creates a 3D CUDA array. Creates a CUDA * array according to the CUDA_ARRAY3D_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY3D_DESCRIPTOR is defined as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
* where:

*
    *
  • *
    * Width, Height, and Depth are the width, height, and depth of * the CUDA array (in elements); the following types of CUDA arrays can * be allocated: *
      *
    • *

      A 1D array is allocated * if Height and Depth extents are both zero. *

      *
    • *
    • *

      A 2D array is allocated * if only Depth extent is zero. *

      *
    • *
    • *

      A 3D array is allocated * if all three extents are non-zero. *

      *
    • *
    • *

      A 1D layered CUDA * array is allocated if only Height is zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A 2D layered CUDA * array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A cubemap CUDA array * is allocated if all three extents are non-zero and the CUDA_ARRAY3D_CUBEMAP * flag is set. Width must be equal to Height, and * Depth must be six. A cubemap is a special type of 2D layered * CUDA array, where the six layers represent the six faces of a cube. * The order of the six * layers in memory is the same as that listed in CUarray_cubemap_face. *

      *
    • *
    • *

      A cubemap layered CUDA * array is allocated if all three extents are non-zero, and both, * CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must * be a multiple of six. A cubemap layered CUDA array is a special type * of 2D layered CUDA array that consists of a collection * of cubemaps. The first * six layers represent the first cubemap, the next six layers form the * second cubemap, and so on. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
*

*
    *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*
    *
  • *
    * Flags may be set to *
      *
    • *

      CUDA_ARRAY3D_LAYERED * to enable creation of layered CUDA arrays. If this flag is set, Depth specifies the number of layers, not the depth of a 3D * array. *

      *
    • *
    • *

      CUDA_ARRAY3D_SURFACE_LDST * to enable surface references to be bound to the CUDA array. If this * flag is not set, cuSurfRefSetArray will fail when attempting to bind * the CUDA array to a surface reference. *

      *
    • *
    • *

      CUDA_ARRAY3D_CUBEMAP * to enable creation of cubemaps. If this flag is set, Width * must be equal to Height, and Depth must be six. If * the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must * be a multiple of six. *

      *
    • *
    • *

      CUDA_ARRAY3D_TEXTURE_GATHER * to indicate that the CUDA array will be used for texture gather. * Texture gather can only be performed on 2D CUDA arrays. *

      *
    • *
    *
    *
  • *
*

*

Width, Height and * Depth must meet certain size requirements as listed in the * following table. All values are specified in elements. Note that for * brevity's sake, the full name of the * device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to * the device attribute * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH. *

*

Note that 2D CUDA arrays have different * size requirements if the CUDA_ARRAY3D_TEXTURE_GATHER flag is set. Width and Height must not be greater than * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH and * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in * that case. *

*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*

CUDA array * type *

*
*

Valid extents * that must always be met * {(width range in * elements), (height range), (depth range)} *

*
*

Valid extents * with CUDA_ARRAY3D_SURFACE_LDST set * {(width range in * elements), (height range), (depth range)} *

*
*

1D

*
*

{ (1,TEXTURE1D_WIDTH), * 0, 0 } *

*
*

{ (1,SURFACE1D_WIDTH), * 0, 0 } *

*
*

2D

*
*

{ (1,TEXTURE2D_WIDTH), * (1,TEXTURE2D_HEIGHT), 0 } *

*
*

{ (1,SURFACE2D_WIDTH), * (1,SURFACE2D_HEIGHT), 0 } *

*
*

3D

*
*

{ (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } *

*
*

{ (1,SURFACE3D_WIDTH), * (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) } *

*
*

1D Layered

*
*

{ * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) } *

*
*

2D Layered

*
*

{ * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) } *

*
*

Cubemap

*
*

{ (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } *

*
*

{ (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 } *

*
*

Cubemap Layered

*
*

{ * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) } *

*
*
*

*

Here are examples of CUDA array * descriptions: *

*

Description for a CUDA array of 2048 * floats: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 0;
     *     desc.Depth = 0;
*

*

Description for a 64 x 64 CUDA array of * floats: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *     desc.Depth = 0;
*

*

Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16's: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *     desc.Depth = depth;
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned array * @param pAllocateArray 3D array descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray) { return checkResult(cuArray3DCreateNative(pHandle, pAllocateArray)); } private static native int cuArray3DCreateNative(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray); /** * Get a 3D CUDA array descriptor. * *
     * CUresult cuArray3DGetDescriptor (
     *      CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
*
*

Get a 3D CUDA array descriptor. Returns * in *pArrayDescriptor a descriptor containing information on * the format and dimensions of the CUDA array hArray. It is * useful for subroutines that have been passed a CUDA array, but need to * know the CUDA array parameters for validation * or other purposes. *

*

This function may be called on 1D and * 2D arrays, in which case the Height and/or Depth * members of the descriptor struct will be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArrayDescriptor Returned 3D array descriptor * @param hArray 3D array to get descriptor of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray) { return checkResult(cuArray3DGetDescriptorNative(pArrayDescriptor, hArray)); } private static native int cuArray3DGetDescriptorNative(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray); /** * Creates a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayCreate (
     *      CUmipmappedArray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
     *      unsigned int  numMipmapLevels )
     * 
*
*

Creates a CUDA mipmapped array. Creates * a CUDA mipmapped array according to the CUDA_ARRAY3D_DESCRIPTOR * structure pMipmappedArrayDesc and returns a handle to the * new CUDA mipmapped array in *pHandle. numMipmapLevels * specifies the number of mipmap levels to be allocated. This value is * clamped to the range [1, 1 + floor(log2(max(width, height, * depth)))]. *

*

The CUDA_ARRAY3D_DESCRIPTOR is defined * as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
* where:

*
    *
  • *
    * Width, Height, and Depth are the width, height, and depth of * the CUDA array (in elements); the following types of CUDA arrays can * be allocated: *
      *
    • *

      A 1D mipmapped array * is allocated if Height and Depth extents are both * zero. *

      *
    • *
    • *

      A 2D mipmapped array * is allocated if only Depth extent is zero. *

      *
    • *
    • *

      A 3D mipmapped array * is allocated if all three extents are non-zero. *

      *
    • *
    • *

      A 1D layered CUDA * mipmapped array is allocated if only Height is zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A 2D layered CUDA * mipmapped array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A cubemap CUDA * mipmapped array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_CUBEMAP flag is set. Width must be equal to Height, and Depth must be six. A cubemap is a special * type of 2D layered CUDA array, where the six layers represent the six * faces of a cube. * The order of the six * layers in memory is the same as that listed in CUarray_cubemap_face. *

      *
    • *
    • *

      A cubemap layered CUDA * mipmapped array is allocated if all three extents are non-zero, and * both, CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must * be a multiple of six. A cubemap layered CUDA array is a special type * of 2D layered CUDA array that consists of a collection * of cubemaps. The first * six layers represent the first cubemap, the next six layers form the * second cubemap, and so on. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
*

*
    *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*
    *
  • *
    * Flags may be set to *
      *
    • *

      CUDA_ARRAY3D_LAYERED * to enable creation of layered CUDA mipmapped arrays. If this flag is * set, Depth specifies the number of layers, not the depth of * a 3D array. *

      *
    • *
    • *

      CUDA_ARRAY3D_SURFACE_LDST * to enable surface references to be bound to individual mipmap levels * of the CUDA mipmapped array. If this flag is not set, * cuSurfRefSetArray will * fail when attempting to bind a mipmap level of the CUDA mipmapped array * to a surface reference. *

      *
    • *
    • *

      CUDA_ARRAY3D_CUBEMAP * to enable creation of mipmapped cubemaps. If this flag is set, Width must be equal to Height, and Depth must * be six. If the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must be a multiple of six. *

      *
    • *
    • *

      CUDA_ARRAY3D_TEXTURE_GATHER * to indicate that the CUDA mipmapped array will be used for texture * gather. Texture gather can only be performed on 2D CUDA * mipmapped arrays. *

      *
    • *
    *
    *
  • *
*

*

Width, Height and * Depth must meet certain size requirements as listed in the * following table. All values are specified in elements. Note that for * brevity's sake, the full name of the * device attribute is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH * refers to the device * attribute * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH. *

*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*

CUDA array * type *

*
*

Valid extents * that must always be met * {(width range in * elements), (height range), (depth range)} *

*
*

1D

*
*

{ * (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 } *

*
*

2D

*
*

{ * (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 } *

*
*

3D

*
*

{ (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } *

*
*

1D Layered

*
*

{ * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } *

*
*

2D Layered

*
*

{ * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } *

*
*

Cubemap

*
*

{ (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } *

*
*

Cubemap Layered

*
*

{ * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } *

*
*
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned mipmapped array * @param pMipmappedArrayDesc mipmapped array descriptor * @param numMipmapLevels Number of mipmap levels * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuMipmappedArrayDestroy * @see JCudaDriver#cuMipmappedArrayGetLevel * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayCreate(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels) { return checkResult(cuMipmappedArrayCreateNative(pHandle, pMipmappedArrayDesc, numMipmapLevels)); } private static native int cuMipmappedArrayCreateNative(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels); /** * Gets a mipmap level of a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayGetLevel (
     *      CUarray* pLevelArray,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  level )
     * 
*
*

Gets a mipmap level of a CUDA mipmapped * array. Returns in *pLevelArray a CUDA array that represents * a single mipmap level of the CUDA mipmapped array hMipmappedArray. *

*

If level is greater than the * maximum number of levels in this mipmapped array, CUDA_ERROR_INVALID_VALUE * is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pLevelArray Returned mipmap level CUDA array * @param hMipmappedArray CUDA mipmapped array * @param level Mipmap level * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuMipmappedArrayCreate * @see JCudaDriver#cuMipmappedArrayDestroy * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayGetLevel(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level) { return checkResult(cuMipmappedArrayGetLevelNative(pLevelArray, hMipmappedArray, level)); } private static native int cuMipmappedArrayGetLevelNative(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level); /** * Destroys a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayDestroy (
     *      CUmipmappedArray hMipmappedArray )
     * 
*
*

Destroys a CUDA mipmapped array. Destroys * the CUDA mipmapped array hMipmappedArray. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hMipmappedArray Mipmapped array to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ARRAY_IS_MAPPED * * @see JCudaDriver#cuMipmappedArrayCreate * @see JCudaDriver#cuMipmappedArrayGetLevel * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { return checkResult(cuMipmappedArrayDestroyNative(hMipmappedArray)); } private static native int cuMipmappedArrayDestroyNative(CUmipmappedArray hMipmappedArray); /** * Creates a texture reference. * *
     * CUresult cuTexRefCreate (
     *      CUtexref* pTexRef )
     * 
*
*

Creates a texture reference. * DeprecatedCreates a texture reference * and returns its handle in *pTexRef. Once created, the * application must call cuTexRefSetArray() or cuTexRefSetAddress() to * associate the reference with allocated memory. Other texture reference * functions are used to specify the format and interpretation * (addressing, filtering, etc.) to be used * when the memory is read through this texture reference. *

*
* * @param pTexRef Returned texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefDestroy */ public static int cuTexRefCreate(CUtexref pTexRef) { return checkResult(cuTexRefCreateNative(pTexRef)); } private static native int cuTexRefCreateNative(CUtexref pTexRef); /** * Destroys a texture reference. * *
     * CUresult cuTexRefDestroy (
     *      CUtexref hTexRef )
     * 
*
*

Destroys a texture reference. * DeprecatedDestroys the texture reference * specified by hTexRef. *

*
* * @param hTexRef Texture reference to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefCreate */ public static int cuTexRefDestroy(CUtexref hTexRef) { return checkResult(cuTexRefDestroyNative(hTexRef)); } private static native int cuTexRefDestroyNative(CUtexref hTexRef); /** * Binds an array as a texture reference. * *
     * CUresult cuTexRefSetArray (
     *      CUtexref hTexRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
*
*

Binds an array as a texture reference. * Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state associated with * the texture reference is superseded by this function. Flags * must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound * to hTexRef is unbound. *

*
* * @param hTexRef Texture reference to bind * @param hArray Array to bind * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags) { return checkResult(cuTexRefSetArrayNative(hTexRef, hArray, Flags)); } private static native int cuTexRefSetArrayNative(CUtexref hTexRef, CUarray hArray, int Flags); /** * Binds a mipmapped array to a texture reference. * *
     * CUresult cuTexRefSetMipmappedArray (
     *      CUtexref hTexRef,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  Flags )
     * 
*
*

Binds a mipmapped array to a texture * reference. Binds the CUDA mipmapped array hMipmappedArray * to the texture reference hTexRef. Any previous address or * CUDA array state associated with the texture reference is superseded * by this function. Flags must be set to CU_TRSA_OVERRIDE_FORMAT. * Any CUDA array previously bound to hTexRef is unbound. *

*
* * @param hTexRef Texture reference to bind * @param hMipmappedArray Mipmapped array to bind * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags) { return checkResult(cuTexRefSetMipmappedArrayNative(hTexRef, hMipmappedArray, Flags)); } private static native int cuTexRefSetMipmappedArrayNative(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags); /** * Binds an address as a texture reference. * *
     * CUresult cuTexRefSetAddress (
     *      size_t* ByteOffset,
     *      CUtexref hTexRef,
     *      CUdeviceptr dptr,
     *      size_t bytes )
     * 
*
*

Binds an address as a texture reference. * Binds a linear address range to the texture reference hTexRef. * Any previous address or CUDA array state associated with the texture * reference is superseded by this function. Any memory * previously bound to hTexRef is * unbound. *

*

Since the hardware enforces an alignment * requirement on texture base addresses, cuTexRefSetAddress() passes back * a byte offset in *ByteOffset that must be applied to texture * fetches in order to read from the desired memory. This offset must be * divided by the texel * size and passed to kernels that read from * the texture so they can be applied to the tex1Dfetch() function. *

*

If the device memory pointer was returned * from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be * passed as the ByteOffset parameter. *

*

The total number of elements (or texels) * in the linear address range cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of * elements is computed as (bytes / bytesPerElement), where * bytesPerElement is determined from the data format and number of * components set using cuTexRefSetFormat(). *

*
* * @param ByteOffset Returned byte offset * @param hTexRef Texture reference to bind * @param dptr Device pointer to bind * @param bytes Size of memory to bind in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetAddress(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes) { return checkResult(cuTexRefSetAddressNative(ByteOffset, hTexRef, dptr, bytes)); } private static native int cuTexRefSetAddressNative(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes); /** * Sets the format for a texture reference. * *
     * CUresult cuTexRefSetFormat (
     *      CUtexref hTexRef,
     *      CUarray_format fmt,
     *      int  NumPackedComponents )
     * 
*
*

Sets the format for a texture reference. * Specifies the format of the data to be read by the texture reference * hTexRef. fmt and NumPackedComponents are * exactly analogous to the Format and NumChannels members of the * CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each * component and the number of components per array element. *

*
* * @param hTexRef Texture reference * @param fmt Format to set * @param NumPackedComponents Number of components per array element * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents) { return checkResult(cuTexRefSetFormatNative(hTexRef, fmt, NumPackedComponents)); } private static native int cuTexRefSetFormatNative(CUtexref hTexRef, int fmt, int NumPackedComponents); /** * Binds an address as a 2D texture reference. * *
     * CUresult cuTexRefSetAddress2D (
     *      CUtexref hTexRef,
     *      const CUDA_ARRAY_DESCRIPTOR* desc,
     *      CUdeviceptr dptr,
     *      size_t Pitch )
     * 
*
*

Binds an address as a 2D texture * reference. Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with * the texture reference is superseded by this function. Any memory * previously bound to hTexRef is * unbound. *

*

Using a tex2D() function inside a kernel * requires a call to either cuTexRefSetArray() to bind the corresponding * texture reference to an array, or cuTexRefSetAddress2D() to bind the * texture reference to linear memory. *

*

Function calls to cuTexRefSetFormat() * cannot follow calls to cuTexRefSetAddress2D() for the same texture * reference. *

*

It is required that dptr be * aligned to the appropriate hardware-specific texture alignment. You * can query this value using the device attribute * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned dptr * is supplied, CUDA_ERROR_INVALID_VALUE is returned. *

*

Pitch has to be aligned to * the hardware-specific texture pitch alignment. This value can be * queried using the device attribute * CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. * If an unaligned Pitch is supplied, CUDA_ERROR_INVALID_VALUE * is returned. *

*

Width and Height, which are specified * in elements (or texels), cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. Pitch, which is specified in bytes, cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. *

*
* * @param hTexRef Texture reference to bind * @param desc Descriptor of CUDA array * @param dptr Device pointer to bind * @param Pitch Line pitch in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes) { return checkResult(cuTexRefSetAddress2DNative(hTexRef, desc, dptr, PitchInBytes)); } private static native int cuTexRefSetAddress2DNative(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes); /** * Sets the addressing mode for a texture reference. * *
     * CUresult cuTexRefSetAddressMode (
     *      CUtexref hTexRef,
     *      int  dim,
     *      CUaddress_mode am )
     * 
*
*

Sets the addressing mode for a texture * reference. Specifies the addressing mode am for the given * dimension dim of the texture reference hTexRef. If * dim is zero, the addressing mode is applied to the first * parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as: *

   typedef enum CUaddress_mode_enum {
     *       CU_TR_ADDRESS_MODE_WRAP = 0,
     *       CU_TR_ADDRESS_MODE_CLAMP = 1,
     *       CU_TR_ADDRESS_MODE_MIRROR = 2,
     *       CU_TR_ADDRESS_MODE_BORDER = 3
     *    } CUaddress_mode;
*

*

Note that this call has no effect if * hTexRef is bound to linear memory. Also, if the flag, * CU_TRSF_NORMALIZED_COORDINATES, is not set, the only supported address * mode is CU_TR_ADDRESS_MODE_CLAMP. *

*
* * @param hTexRef Texture reference * @param dim Dimension * @param am Addressing mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am) { return checkResult(cuTexRefSetAddressModeNative(hTexRef, dim, am)); } private static native int cuTexRefSetAddressModeNative(CUtexref hTexRef, int dim, int am); /** * Sets the filtering mode for a texture reference. * *
     * CUresult cuTexRefSetFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
*
*

Sets the filtering mode for a texture * reference. Specifies the filtering mode fm to be used when * reading memory through the texture reference hTexRef. * CUfilter_mode_enum is defined as: *

*
   typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
*

*

Note that this call has no effect if * hTexRef is bound to linear memory. *

*
* * @param hTexRef Texture reference * @param fm Filtering mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm) { return checkResult(cuTexRefSetFilterModeNative(hTexRef, fm)); } private static native int cuTexRefSetFilterModeNative(CUtexref hTexRef, int fm); /** * Sets the mipmap filtering mode for a texture reference. * *
     * CUresult cuTexRefSetMipmapFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
*
*

Sets the mipmap filtering mode for a * texture reference. Specifies the mipmap filtering mode fm * to be used when reading memory through the texture reference hTexRef. CUfilter_mode_enum is defined as: *

*
   typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
*

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param fm Filtering mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetMipmapFilterMode(CUtexref hTexRef, int fm) { return checkResult(cuTexRefSetMipmapFilterModeNative(hTexRef, fm)); } private static native int cuTexRefSetMipmapFilterModeNative(CUtexref hTexRef, int fm); /** * Sets the mipmap level bias for a texture reference. * *
     * CUresult cuTexRefSetMipmapLevelBias (
     *      CUtexref hTexRef,
     *      float  bias )
     * 
*
*

Sets the mipmap level bias for a texture * reference. Specifies the mipmap level bias bias to be added * to the specified mipmap level when reading memory through the texture * reference hTexRef. *

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param bias Mipmap level bias * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) { return checkResult(cuTexRefSetMipmapLevelBiasNative(hTexRef, bias)); } private static native int cuTexRefSetMipmapLevelBiasNative(CUtexref hTexRef, float bias); /** * Sets the mipmap min/max mipmap level clamps for a texture reference. * *
     * CUresult cuTexRefSetMipmapLevelClamp (
     *      CUtexref hTexRef,
     *      float  minMipmapLevelClamp,
     *      float  maxMipmapLevelClamp )
     * 
*
*

Sets the mipmap min/max mipmap level * clamps for a texture reference. Specifies the min/max mipmap level * clamps, minMipmapLevelClamp and maxMipmapLevelClamp * respectively, to be used when reading memory through the texture * reference hTexRef. *

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param minMipmapLevelClamp Mipmap min level clamp * @param maxMipmapLevelClamp Mipmap max level clamp * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { return checkResult(cuTexRefSetMipmapLevelClampNative(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp)); } private static native int cuTexRefSetMipmapLevelClampNative(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp); /** * Sets the maximum anistropy for a texture reference. * *
     * CUresult cuTexRefSetMaxAnisotropy (
     *      CUtexref hTexRef,
     *      unsigned int  maxAniso )
     * 
*
*

Sets the maximum anistropy for a texture * reference. Specifies the maximum aniostropy maxAniso to be * used when reading memory through the texture reference hTexRef. *

*

Note that this call has no effect if * hTexRef is bound to linear memory. *

*
* * @param hTexRef Texture reference * @param maxAniso Maximum anisotropy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetMaxAnisotropy(CUtexref hTexRef, int maxAniso) { return checkResult(cuTexRefSetMaxAnisotropyNative(hTexRef, maxAniso)); } private static native int cuTexRefSetMaxAnisotropyNative(CUtexref hTexRef, int maxAniso); /** * Sets the flags for a texture reference. * *
     * CUresult cuTexRefSetFlags (
     *      CUtexref hTexRef,
     *      unsigned int  Flags )
     * 
*
*

Sets the flags for a texture reference. * Specifies optional flags via Flags to specify the behavior * of data returned through the texture reference hTexRef. The * valid flags are: *

*
    *
  • *

    CU_TRSF_READ_AS_INTEGER, which * suppresses the default behavior of having the texture promote integer * data to floating point data in the range [0, * 1]. Note that texture with * 32-bit integer format would not be promoted, regardless of whether or * not this flag is specified; *

    *
  • *
  • *

    CU_TRSF_NORMALIZED_COORDINATES, * which suppresses the default behavior of having the texture coordinates * range from [0, Dim) where Dim is the width or height * of the CUDA array. Instead, the * texture coordinates [0, 1.0) reference the entire breadth of the array * dimension; *

    *
  • *
*

*
* * @param hTexRef Texture reference * @param Flags Optional flags to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefSetFlags(CUtexref hTexRef, int Flags) { return checkResult(cuTexRefSetFlagsNative(hTexRef, Flags)); } private static native int cuTexRefSetFlagsNative(CUtexref hTexRef, int Flags); /** * Gets the address associated with a texture reference. * *
     * CUresult cuTexRefGetAddress (
     *      CUdeviceptr* pdptr,
     *      CUtexref hTexRef )
     * 
*
*

Gets the address associated with a * texture reference. Returns in *pdptr the base address bound * to the texture reference hTexRef, or returns * CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any * device memory range. *

*
* * @param pdptr Returned device address * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef) { return checkResult(cuTexRefGetAddressNative(pdptr, hTexRef)); } private static native int cuTexRefGetAddressNative(CUdeviceptr pdptr, CUtexref hTexRef); /** * Gets the array bound to a texture reference. * *
     * CUresult cuTexRefGetArray (
     *      CUarray* phArray,
     *      CUtexref hTexRef )
     * 
*
*

Gets the array bound to a texture * reference. Returns in *phArray the CUDA array bound to the * texture reference hTexRef, or returns CUDA_ERROR_INVALID_VALUE * if the texture reference is not bound to any CUDA array. *

*
* * @param phArray Returned array * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef) { return checkResult(cuTexRefGetArrayNative(phArray, hTexRef)); } private static native int cuTexRefGetArrayNative(CUarray phArray, CUtexref hTexRef); /** * Gets the mipmapped array bound to a texture reference. * *
     * CUresult cuTexRefGetMipmappedArray (
     *      CUmipmappedArray* phMipmappedArray,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmapped array bound to a * texture reference. Returns in *phMipmappedArray the CUDA * mipmapped array bound to the texture reference hTexRef, or * returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound * to any CUDA mipmapped array. *

*
* * @param phMipmappedArray Returned mipmapped array * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetMipmappedArray(CUmipmappedArray phMipmappedArray, CUtexref hTexRef) { return checkResult(cuTexRefGetMipmappedArrayNative(phMipmappedArray, hTexRef)); } private static native int cuTexRefGetMipmappedArrayNative(CUmipmappedArray phMipmappedArray, CUtexref hTexRef); /** * Gets the addressing mode used by a texture reference. * *
     * CUresult cuTexRefGetAddressMode (
     *      CUaddress_mode* pam,
     *      CUtexref hTexRef,
     *      int  dim )
     * 
*
*

Gets the addressing mode used by a * texture reference. Returns in *pam the addressing mode * corresponding to the dimension dim of the texture reference * hTexRef. Currently, the only valid value for dim * are 0 and 1. *

*
* * @param pam Returned addressing mode * @param hTexRef Texture reference * @param dim Dimension * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetAddressMode(int pam[], CUtexref hTexRef, int dim) { return checkResult(cuTexRefGetAddressModeNative(pam, hTexRef, dim)); } private static native int cuTexRefGetAddressModeNative(int pam[], CUtexref hTexRef, int dim); /** * Gets the filter-mode used by a texture reference. * *
     * CUresult cuTexRefGetFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
*
*

Gets the filter-mode used by a texture * reference. Returns in *pfm the filtering mode of the texture * reference hTexRef. *

*
* * @param pfm Returned filtering mode * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetFilterMode(int pfm[], CUtexref hTexRef) { return checkResult(cuTexRefGetFilterModeNative(pfm, hTexRef)); } private static native int cuTexRefGetFilterModeNative(int pfm[], CUtexref hTexRef); /** * Gets the format used by a texture reference. * *
     * CUresult cuTexRefGetFormat (
     *      CUarray_format* pFormat,
     *      int* pNumChannels,
     *      CUtexref hTexRef )
     * 
*
*

Gets the format used by a texture * reference. Returns in *pFormat and *pNumChannels * the format and number of components of the CUDA array bound to the * texture reference hTexRef. If pFormat or pNumChannels is NULL, it will be ignored. *

*
* * @param pFormat Returned format * @param pNumChannels Returned number of components * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags */ public static int cuTexRefGetFormat(int pFormat[], int pNumChannels[], CUtexref hTexRef) { return checkResult(cuTexRefGetFormatNative(pFormat, pNumChannels, hTexRef)); } private static native int cuTexRefGetFormatNative(int pFormat[], int pNumChannels[], CUtexref hTexRef); /** * Gets the mipmap filtering mode for a texture reference. * *
     * CUresult cuTexRefGetMipmapFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmap filtering mode for a * texture reference. Returns the mipmap filtering mode in pfm * that's used when reading memory through the texture reference hTexRef. *

*
* * @param pfm Returned mipmap filtering mode * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetMipmapFilterMode(int pfm[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapFilterModeNative(pfm, hTexRef)); } private static native int cuTexRefGetMipmapFilterModeNative(int pfm[], CUtexref hTexRef); /** * Gets the mipmap level bias for a texture reference. * *
     * CUresult cuTexRefGetMipmapLevelBias (
     *      float* pbias,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmap level bias for a texture * reference. Returns the mipmap level bias in pBias that's * added to the specified mipmap level when reading memory through the * texture reference hTexRef. *

*
* * @param pbias Returned mipmap level bias * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetMipmapLevelBias(float pbias[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapLevelBiasNative(pbias, hTexRef)); } private static native int cuTexRefGetMipmapLevelBiasNative(float pbias[], CUtexref hTexRef); /** * Gets the min/max mipmap level clamps for a texture reference. * *
     * CUresult cuTexRefGetMipmapLevelClamp (
     *      float* pminMipmapLevelClamp,
     *      float* pmaxMipmapLevelClamp,
     *      CUtexref hTexRef )
     * 
*
*

Gets the min/max mipmap level clamps for * a texture reference. Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp that's * used when reading memory through the texture reference hTexRef. *

*
* * @param pminMipmapLevelClamp Returned mipmap min level clamp * @param pmaxMipmapLevelClamp Returned mipmap max level clamp * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetMipmapLevelClamp(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapLevelClampNative(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef)); } private static native int cuTexRefGetMipmapLevelClampNative(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef); /** * Gets the maximum anistropy for a texture reference. * *
     * CUresult cuTexRefGetMaxAnisotropy (
     *      int* pmaxAniso,
     *      CUtexref hTexRef )
     * 
*
*

Gets the maximum anistropy for a texture * reference. Returns the maximum aniostropy in pmaxAniso * that's used when reading memory through the texture reference hTexRef. *

*
* * @param pmaxAniso Returned maximum anisotropy * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetMaxAnisotropy(int pmaxAniso[], CUtexref hTexRef) { return checkResult(cuTexRefGetMaxAnisotropyNative(pmaxAniso, hTexRef)); } private static native int cuTexRefGetMaxAnisotropyNative(int pmaxAniso[], CUtexref hTexRef); /** * Gets the flags used by a texture reference. * *
     * CUresult cuTexRefGetFlags (
     *      unsigned int* pFlags,
     *      CUtexref hTexRef )
     * 
*
*

Gets the flags used by a texture * reference. Returns in *pFlags the flags of the texture * reference hTexRef. *

*
* * @param pFlags Returned flags * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFormat */ public static int cuTexRefGetFlags(int pFlags[], CUtexref hTexRef) { return checkResult(cuTexRefGetFlagsNative(pFlags, hTexRef)); } private static native int cuTexRefGetFlagsNative(int pFlags[], CUtexref hTexRef); /** * Sets the CUDA array for a surface reference. * *
     * CUresult cuSurfRefSetArray (
     *      CUsurfref hSurfRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
*
*

Sets the CUDA array for a surface * reference. Sets the CUDA array hArray to be read and written * by the surface reference hSurfRef. Any previous CUDA array * state associated with the surface reference is superseded by this * function. Flags must be set to 0. The CUDA_ARRAY3D_SURFACE_LDST * flag must have been set for the CUDA array. Any CUDA array previously * bound to hSurfRef is unbound. *

*
* * @param hSurfRef Surface reference handle * @param hArray CUDA array handle * @param Flags set to 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuSurfRefGetArray */ public static int cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, int Flags ) { return checkResult(cuSurfRefSetArrayNative(hSurfRef, hArray, Flags)); } private static native int cuSurfRefSetArrayNative(CUsurfref hSurfRef, CUarray hArray, int Flags ); /** * Passes back the CUDA array bound to a surface reference. * *
     * CUresult cuSurfRefGetArray (
     *      CUarray* phArray,
     *      CUsurfref hSurfRef )
     * 
*
*

Passes back the CUDA array bound to a * surface reference. Returns in *phArray the CUDA array bound * to the surface reference hSurfRef, or returns * CUDA_ERROR_INVALID_VALUE if the surface reference is not bound to any * CUDA array. *

*
* * @param phArray Surface reference handle * @param hSurfRef Surface reference handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuSurfRefSetArray */ public static int cuSurfRefGetArray( CUarray phArray, CUsurfref hSurfRef ) { return checkResult(cuSurfRefGetArrayNative(phArray, hSurfRef)); } private static native int cuSurfRefGetArrayNative( CUarray phArray, CUsurfref hSurfRef ); /** * Creates a texture object. * *
     * CUresult cuTexObjectCreate (
     *      CUtexObject* pTexObject,
     *      const CUDA_RESOURCE_DESC* pResDesc,
     *      const CUDA_TEXTURE_DESC* pTexDesc,
     *      const CUDA_RESOURCE_VIEW_DESC* pResViewDesc )
     * 
*
*

Creates a texture object. Creates a * texture object and returns it in pTexObject. pResDesc * describes the data to texture from. pTexDesc describes how * the data should be sampled. pResViewDesc is an optional * argument that specifies an alternate format for the data described by * pResDesc, and also describes the subresource region to * restrict access to when texturing. pResViewDesc can only be * specified if the type of resource is a CUDA array or a CUDA mipmapped * array. *

*

Texture objects are only supported on * devices of compute capability 3.0 or higher. *

*

The CUDA_RESOURCE_DESC structure is * defined as: *

        typedef struct CUDA_RESOURCE_DESC_st
     *         {
     *             CUresourcetype resType;
     * 
     *             union {
     *                 struct {
     *                     CUarray hArray;
     *                 } array;
     *                 struct {
     *                     CUmipmappedArray hMipmappedArray;
     *                 } mipmap;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t sizeInBytes;
     *                 } linear;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t width;
     *                     size_t height;
     *                     size_t pitchInBytes;
     *                 } pitch2D;
     *             } res;
     * 
     *             unsigned int flags;
     *         } CUDA_RESOURCE_DESC;
* where: *
    *
  • *
    * CUDA_RESOURCE_DESC::resType * specifies the type of resource to texture from. CUresourceType is * defined as: *
            typedef enum CUresourcetype_enum {
         *             CU_RESOURCE_TYPE_ARRAY           = 0x00,
         *             CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
         *             CU_RESOURCE_TYPE_LINEAR          = 0x02,
         *             CU_RESOURCE_TYPE_PITCH2D         = 0x03
         *         } CUresourcetype;
    *
    *
  • *
*

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_ARRAY, CUDA_RESOURCE_DESC::res::array::hArray must * be set to a valid CUDA array handle. *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_MIPMAPPED_ARRAY, * CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray must be set to a valid * CUDA mipmapped array handle. *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_LINEAR, CUDA_RESOURCE_DESC::res::linear::devPtr * must be set to a valid device pointer, that is aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::linear::format * and CUDA_RESOURCE_DESC::res::linear::numChannels describe the format * of each component * and the number of components per array * element. CUDA_RESOURCE_DESC::res::linear::sizeInBytes specifies the * size of the array * in bytes. The total number of elements * in the linear address range cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of * elements is computed as (sizeInBytes / (sizeof(format) * * numChannels)). *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_PITCH2D, CUDA_RESOURCE_DESC::res::pitch2D::devPtr * must be set to a valid device pointer, that is aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::pitch2D::format * and CUDA_RESOURCE_DESC::res::pitch2D::numChannels describe the format * of each component * and the number of components per array * element. CUDA_RESOURCE_DESC::res::pitch2D::width and * CUDA_RESOURCE_DESC::res::pitch2D::height * specify the width and height of the array * in elements, and cannot exceed CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH * and CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. * CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch * between two rows in bytes and has to be * aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. *

*
    *
  • *

    flags must be set to zero.

    *
  • *
*

*

The CUDA_TEXTURE_DESC struct is defined * as *

        typedef struct CUDA_TEXTURE_DESC_st {
     *             CUaddress_mode addressMode[3];
     *             CUfilter_mode filterMode;
     *             unsigned int flags;
     *             unsigned int maxAnisotropy;
     *             CUfilter_mode mipmapFilterMode;
     *             float mipmapLevelBias;
     *             float minMipmapLevelClamp;
     *             float maxMipmapLevelClamp;
     *         } CUDA_TEXTURE_DESC;
* where *
    *
  • *
    * CUDA_TEXTURE_DESC::addressMode * specifies the addressing mode for each dimension of the texture data. * CUaddress_mode is defined as: *
            typedef enum
         * CUaddress_mode_enum {
         *             CU_TR_ADDRESS_MODE_WRAP = 0,
         *             CU_TR_ADDRESS_MODE_CLAMP = 1,
         *             CU_TR_ADDRESS_MODE_MIRROR = 2,
         *             CU_TR_ADDRESS_MODE_BORDER = 3
         *         } CUaddress_mode;
    * This is ignored if * CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. Also, if the * flag, CU_TRSF_NORMALIZED_COORDINATES is not set, the only supported * address mode is CU_TR_ADDRESS_MODE_CLAMP. *
    *
  • *
*

*
    *
  • *
    * CUDA_TEXTURE_DESC::filterMode * specifies the filtering mode to be used when fetching from the texture. * CUfilter_mode is defined as: *
            typedef enum CUfilter_mode_enum
         * {
         *             CU_TR_FILTER_MODE_POINT = 0,
         *             CU_TR_FILTER_MODE_LINEAR = 1
         *         } CUfilter_mode;
    * This is ignored if * CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. *
    *
  • *
*

*
    *
  • *
    * CUDA_TEXTURE_DESC::flags can * be any combination of the following: *
      *
    • *

      CU_TRSF_READ_AS_INTEGER, * which suppresses the default behavior of having the texture promote * integer data to floating point data in the range [0, * 1]. Note that texture * with 32-bit integer format would not be promoted, regardless of whether * or not this flag is specified. *

      *
    • *
    • *

      CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior * of having the texture coordinates range from [0, Dim) where Dim is the * width or height * of the CUDA array. * Instead, the texture coordinates [0, 1.0) reference the entire breadth * of the array dimension; Note that * for CUDA mipmapped * arrays, this flag has to be set. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::maxAnisotropy * specifies the maximum anistropy ratio to be used when doing anisotropic * filtering. This value will be clamped to the range * [1,16]. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::mipmapFilterMode * specifies the filter mode when the calculated mipmap level lies between * two defined mipmap levels. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::mipmapLevelBias * specifies the offset to be applied to the calculated mipmap level. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::minMipmapLevelClamp * specifies the lower end of the mipmap level range to clamp access to. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::maxMipmapLevelClamp * specifies the upper end of the mipmap level range to clamp access to. *

    *
  • *
*

*

The CUDA_RESOURCE_VIEW_DESC struct is * defined as *

        typedef struct CUDA_RESOURCE_VIEW_DESC_st
     *         {
     *             CUresourceViewFormat format;
     *             size_t width;
     *             size_t height;
     *             size_t depth;
     *             unsigned int firstMipmapLevel;
     *             unsigned int lastMipmapLevel;
     *             unsigned int firstLayer;
     *             unsigned int lastLayer;
     *         } CUDA_RESOURCE_VIEW_DESC;
* where: *
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::format * specifies how the data contained in the CUDA array or CUDA mipmapped * array should be interpreted. Note that this can incur * a change in size of the texture * data. If the resource view format is a block compressed format, then * the underlying CUDA array * or CUDA mipmapped array has to * have a base of format CU_AD_FORMAT_UNSIGNED_INT32. with 2 or 4 channels, * depending on the block compressed format. For ex., BC1 and BC4 require * the underlying CUDA array to * have a format of * CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats * require the underlying resource to have the same base format but with * 4 channels. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::width * specifies the new width of the texture data. If the resource view * format is a block compressed format, this value has to * be 4 times the original width * of the resource. For non block compressed formats, this value has to * be equal to that of the * original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::height * specifies the new height of the texture data. If the resource view * format is a block compressed format, this value has to * be 4 times the original height * of the resource. For non block compressed formats, this value has to * be equal to that of the * original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::depth * specifies the new depth of the texture data. This value has to be equal * to that of the original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed * mipmap level. This will be the new mipmap level zero. For non-mipmapped * resources, this value * has to be * zero.CUDA_TEXTURE_DESC::minMipmapLevelClamp and * CUDA_TEXTURE_DESC::maxMipmapLevelClamp will be relative to this value. * For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp * of 1.2 is specified, * then the actual minimum mipmap * level clamp will be 3.2. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel * specifies the least detailed mipmap level. For non-mipmapped resources, * this value has to be zero. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::firstLayer * specifies the first layer index for layered textures. This will be the * new layer zero. For non-layered resources, this value * has to be zero. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::lastLayer * specifies the last layer index for layered textures. For non-layered * resources, this value has to be zero. *

    *
  • *
*

*
* * @param pTexObject Texture object to create * @param pResDesc Resource descriptor * @param pTexDesc Texture descriptor * @param pResViewDesc Resource view descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectDestroy */ public static int cuTexObjectCreate(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc) { return checkResult(cuTexObjectCreateNative(pTexObject, pResDesc, pTexDesc, pResViewDesc)); } private static native int cuTexObjectCreateNative(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc); /** * Destroys a texture object. * *
     * CUresult cuTexObjectDestroy (
     *      CUtexObject texObject )
     * 
*
*

Destroys a texture object. Destroys the * texture object specified by texObject. *

*
* * @param texObject Texture object to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectDestroy(CUtexObject texObject) { return checkResult(cuTexObjectDestroyNative(texObject)); } private static native int cuTexObjectDestroyNative(CUtexObject texObject); /** * Returns a texture object's resource descriptor. * *
     * CUresult cuTexObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's resource * descriptor. Returns the resource descriptor for the texture object * specified by texObject. *

*
* * @param pResDesc Resource descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetResourceDescNative(pResDesc, texObject)); } private static native int cuTexObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject); /** * Returns a texture object's texture descriptor. * *
     * CUresult cuTexObjectGetTextureDesc (
     *      CUDA_TEXTURE_DESC* pTexDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's texture * descriptor. Returns the texture descriptor for the texture object * specified by texObject. *

*
* * @param pTexDesc Texture descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetTextureDescNative(pTexDesc, texObject)); } private static native int cuTexObjectGetTextureDescNative(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject); /** * Returns a texture object's resource view descriptor. * *
     * CUresult cuTexObjectGetResourceViewDesc (
     *      CUDA_RESOURCE_VIEW_DESC* pResViewDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's resource view * descriptor. Returns the resource view descriptor for the texture * object specified * by texObject. If no resource * view was set for texObject, the CUDA_ERROR_INVALID_VALUE is * returned. *

*
* * @param pResViewDesc Resource view descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetResourceViewDescNative(pResViewDesc, texObject)); } private static native int cuTexObjectGetResourceViewDescNative(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject); /** * Creates a surface object. * *
     * CUresult cuSurfObjectCreate (
     *      CUsurfObject* pSurfObject,
     *      const CUDA_RESOURCE_DESC* pResDesc )
     * 
*
*

Creates a surface object. Creates a * surface object and returns it in pSurfObject. pResDesc describes the data to perform surface load/stores on. * CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and * CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA * array handle. CUDA_RESOURCE_DESC::flags must be set to zero. *

*

Surface objects are only supported on * devices of compute capability 3.0 or higher. *

*
* * @param pSurfObject Surface object to create * @param pResDesc Resource descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectDestroy */ public static int cuSurfObjectCreate(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc) { return checkResult(cuSurfObjectCreateNative(pSurfObject, pResDesc)); } private static native int cuSurfObjectCreateNative(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc); /** * Destroys a surface object. * *
     * CUresult cuSurfObjectDestroy (
     *      CUsurfObject surfObject )
     * 
*
*

Destroys a surface object. Destroys the * surface object specified by surfObject. *

*
* * @param surfObject Surface object to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectCreate */ public static int cuSurfObjectDestroy(CUsurfObject surfObject) { return checkResult(cuSurfObjectDestroyNative(surfObject)); } private static native int cuSurfObjectDestroyNative(CUsurfObject surfObject); /** * Returns a surface object's resource descriptor. * *
     * CUresult cuSurfObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUsurfObject surfObject )
     * 
*
*

Returns a surface object's resource * descriptor. Returns the resource descriptor for the surface object * specified by surfObject. *

*
* * @param pResDesc Resource descriptor * @param surfObject Surface object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectCreate */ public static int cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject) { return checkResult(cuSurfObjectGetResourceDescNative(pResDesc, surfObject)); } private static native int cuSurfObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject); /** * Queries if a device may directly access a peer device's memory. * *
     * CUresult cuDeviceCanAccessPeer (
     *      int* canAccessPeer,
     *      CUdevice dev,
     *      CUdevice peerDev )
     * 
*
*

Queries if a device may directly access * a peer device's memory. Returns in *canAccessPeer a value * of 1 if contexts on dev are capable of directly accessing * memory from contexts on peerDev and 0 otherwise. If direct * access of peerDev from dev is possible, then access * may be enabled on two specific contexts by calling * cuCtxEnablePeerAccess(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param canAccessPeer Returned access capability * @param dev Device from which allocations on peerDev are to be directly accessed. * @param peerDev Device on which the allocations to be directly accessed by dev reside. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuCtxEnablePeerAccess * @see JCudaDriver#cuCtxDisablePeerAccess */ public static int cuDeviceCanAccessPeer(int canAccessPeer[], CUdevice dev, CUdevice peerDev) { return checkResult(cuDeviceCanAccessPeerNative(canAccessPeer, dev, peerDev)); } private static native int cuDeviceCanAccessPeerNative(int canAccessPeer[], CUdevice dev, CUdevice peerDev); /** * Enables direct access to memory allocations in a peer context. * *
     * CUresult cuCtxEnablePeerAccess (
     *      CUcontext peerContext,
     *      unsigned int  Flags )
     * 
*
*

Enables direct access to memory * allocations in a peer context. If both the current context and peerContext are on devices which support unified addressing (as * may be queried using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same * major compute capability, then on success all allocations from peerContext will immediately be accessible by the current context. * See Unified Addressing for additional details. *

*

Note that access granted by this call * is unidirectional and that in order to access memory from the current * context in peerContext, a separate symmetric call to * cuCtxEnablePeerAccess() is required. *

*

Returns CUDA_ERROR_PEER_ACCESS_UNSUPPORTED * if cuDeviceCanAccessPeer() indicates that the CUdevice of the current * context cannot directly access memory from the CUdevice of peerContext. *

*

Returns CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED * if direct access of peerContext from the current context has * already been enabled. *

*

Returns CUDA_ERROR_TOO_MANY_PEERS if * direct peer access is not possible because hardware resources required * for peer access have been exhausted. *

*

Returns CUDA_ERROR_INVALID_CONTEXT if * there is no current context, peerContext is not a valid * context, or if the current context is peerContext. *

*

Returns CUDA_ERROR_INVALID_VALUE if Flags is not 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param peerContext Peer context to enable direct access to from the current context * @param Flags Reserved for future use and must be set to 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_TOO_MANY_PEERS, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuDeviceCanAccessPeer * @see JCudaDriver#cuCtxDisablePeerAccess */ public static int cuCtxEnablePeerAccess(CUcontext peerContext, int Flags) { return checkResult(cuCtxEnablePeerAccessNative(peerContext, Flags)); } private static native int cuCtxEnablePeerAccessNative(CUcontext peerContext, int Flags); /** * Disables direct access to memory allocations in a peer context and unregisters any registered allocations. * *
     * CUresult cuCtxDisablePeerAccess (
     *      CUcontext peerContext )
     * 
*
*

Disables direct access to memory * allocations in a peer context and unregisters any registered allocations. * Returns CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has * not yet been enabled from peerContext to the current * context. *

*

Returns CUDA_ERROR_INVALID_CONTEXT if * there is no current context, or if peerContext is not a valid * context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param peerContext Peer context to disable direct access to * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuDeviceCanAccessPeer * @see JCudaDriver#cuCtxEnablePeerAccess */ public static int cuCtxDisablePeerAccess(CUcontext peerContext) { return checkResult(cuCtxDisablePeerAccessNative(peerContext)); } private static native int cuCtxDisablePeerAccessNative(CUcontext peerContext); /** * Sets the parameter size for the function. * *
     * CUresult cuParamSetSize (
     *      CUfunction hfunc,
     *      unsigned int  numbytes )
     * 
*
*

Sets the parameter size for the function. * DeprecatedSets through numbytes * the total size in bytes needed by the function parameters of the kernel * corresponding to hfunc. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to set parameter size for * @param numbytes Size of parameter list in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuParamSetSize(CUfunction hfunc, int numbytes) { return checkResult(cuParamSetSizeNative(hfunc, numbytes)); } private static native int cuParamSetSizeNative(CUfunction hfunc, int numbytes); /** * Adds an integer parameter to the function's argument list. * *
     * CUresult cuParamSeti (
     *      CUfunction hfunc,
     *      int  offset,
     *      unsigned int  value )
     * 
*
*

Adds an integer parameter to the * function's argument list. * DeprecatedSets an integer parameter that * will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add parameter to * @param offset Offset to add parameter to argument list * @param value Value of parameter * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuParamSeti(CUfunction hfunc, int offset, int value) { return checkResult(cuParamSetiNative(hfunc, offset, value)); } private static native int cuParamSetiNative(CUfunction hfunc, int offset, int value); /** * Adds a floating-point parameter to the function's argument list. * *
     * CUresult cuParamSetf (
     *      CUfunction hfunc,
     *      int  offset,
     *      float  value )
     * 
*
*

Adds a floating-point parameter to the * function's argument list. * DeprecatedSets a floating-point parameter * that will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add parameter to * @param offset Offset to add parameter to argument list * @param value Value of parameter * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuParamSetf(CUfunction hfunc, int offset, float value) { return checkResult(cuParamSetfNative(hfunc, offset, value)); } private static native int cuParamSetfNative(CUfunction hfunc, int offset, float value); /** * Adds arbitrary data to the function's argument list. * *
     * CUresult cuParamSetv (
     *      CUfunction hfunc,
     *      int  offset,
     *      void* ptr,
     *      unsigned int  numbytes )
     * 
*
*

Adds arbitrary data to the function's * argument list. * DeprecatedCopies an arbitrary amount of * data (specified in numbytes) from ptr into the * parameter space of the kernel corresponding to hfunc. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add data to * @param offset Offset to add data to argument list * @param ptr Pointer to arbitrary data * @param numbytes Size of data to copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes) { return checkResult(cuParamSetvNative(hfunc, offset, ptr, numbytes)); } private static native int cuParamSetvNative(CUfunction hfunc, int offset, Pointer ptr, int numbytes); /** * Adds a texture-reference to the function's argument list. * *
     * CUresult cuParamSetTexRef (
     *      CUfunction hfunc,
     *      int  texunit,
     *      CUtexref hTexRef )
     * 
*
*

Adds a texture-reference to the function's * argument list. * DeprecatedMakes the CUDA array or linear * memory bound to the texture reference hTexRef available to a * device program as a texture. In this version of CUDA, the * texture-reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add texture-reference to * @param texunit Texture unit (must be CU_PARAM_TR_DEFAULT) * @param hTexRef Texture-reference to add to argument list * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * */ public static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) { return checkResult(cuParamSetTexRefNative(hfunc, texunit, hTexRef)); } private static native int cuParamSetTexRefNative(CUfunction hfunc, int texunit, CUtexref hTexRef); /** *
     * \brief Returns occupancy of a function
     *
     * Returns in \p *numBlocks the number of the maximum active blocks per
     * streaming multiprocessor.
     *
     * \param numBlocks       - Returned occupancy
     * \param func            - Kernel for which occupancy is calulated
     * \param blockSize       - Block size the kernel is intended to be launched with
     * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
*/ public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize) { return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorNative(numBlocks, func, blockSize, dynamicSMemSize)); } private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize); /** *
     * \brief Suggest a launch configuration with reasonable occupancy
     *
     * Returns in \p *blockSize a reasonable block size that can achieve
     * the maximum occupancy (or, the maximum number of active warps with
     * the fewest blocks per multiprocessor), and in \p *minGridSize the
     * minimum grid size to achieve the maximum occupancy.
     *
     * If \p blockSizeLimit is 0, the configurator will use the maximum
     * block size permitted by the device / function instead.
     *
     * If per-block dynamic shared memory allocation is not needed, the
     * user should leave both \p blockSizeToDynamicSMemSize and \p
     * dynamicSMemSize as 0.
     *
     * If per-block dynamic shared memory allocation is needed, then if
     * the dynamic shared memory size is constant regardless of block
     * size, the size should be passed through \p dynamicSMemSize, and \p
     * blockSizeToDynamicSMemSize should be NULL.
     *
     * Otherwise, if the per-block dynamic shared memory size varies with
     * different block sizes, the user needs to provide a unary function
     * through \p blockSizeToDynamicSMemSize that computes the dynamic
     * shared memory needed by \p func for any given block size. \p
     * dynamicSMemSize is ignored. An example signature is:
     *
     * \code
     *    // Take block size, returns dynamic shared memory needed
     *    size_t blockToSmem(int blockSize);
     * \endcode
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calulated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
*/ public static int cuOccupancyMaxPotentialBlockSize(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit) { // The callback involves a state on the native side, // so ensure synchronization here synchronized (OCCUPANCY_LOCK) { return checkResult(cuOccupancyMaxPotentialBlockSizeNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit)); } } private static native int cuOccupancyMaxPotentialBlockSizeNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit); private static final Object OCCUPANCY_LOCK = new Object(); /** * Launches a CUDA function. * *
     * CUresult cuLaunch (
     *      CUfunction f )
     * 
*
*

Launches a CUDA function. * DeprecatedInvokes the kernel f * on a 1 x 1 x 1 grid of blocks. The block contains the number of threads * specified by a previous call to cuFuncSetBlockShape(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuLaunch(CUfunction f) { return checkResult(cuLaunchNative(f)); } private static native int cuLaunchNative(CUfunction f); /** * Launches a CUDA function. * *
     * CUresult cuLaunchGrid (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height )
     * 
*
*

Launches a CUDA function. * DeprecatedInvokes the kernel f * on a grid_width x grid_height grid of blocks. Each * block contains the number of threads specified by a previous call to * cuFuncSetBlockShape(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * @param grid_width Width of grid in blocks * @param grid_height Height of grid in blocks * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel */ public static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height) { return checkResult(cuLaunchGridNative(f, grid_width, grid_height)); } private static native int cuLaunchGridNative(CUfunction f, int grid_width, int grid_height); /** * Launches a CUDA function. * *
     * CUresult cuLaunchGridAsync (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height,
     *      CUstream hStream )
     * 
*
*

Launches a CUDA function. * DeprecatedInvokes the kernel f * on a grid_width x grid_height grid of blocks. Each * block contains the number of threads specified by a previous call to * cuFuncSetBlockShape(). *

*

cuLaunchGridAsync() can optionally be * associated to a stream by passing a non-zero hStream * argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * @param grid_width Width of grid in blocks * @param grid_height Height of grid in blocks * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_FAILED, * CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT, * CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchKernel */ public static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) { return checkResult(cuLaunchGridAsyncNative(f, grid_width, grid_height, hStream)); } private static native int cuLaunchGridAsyncNative(CUfunction f, int grid_width, int grid_height, CUstream hStream); /** * Creates an event. * *
     * CUresult cuEventCreate (
     *      CUevent* phEvent,
     *      unsigned int  Flags )
     * 
*
*

Creates an event. Creates an event * *phEvent with the flags specified via Flags. Valid flags * include: *

    *
  • *

    CU_EVENT_DEFAULT: Default event * creation flag. *

    *
  • *
  • *

    CU_EVENT_BLOCKING_SYNC: * Specifies that the created event should use blocking synchronization. * A CPU thread that uses cuEventSynchronize() to wait on an event created * with this flag will block until the event has actually been recorded. *

    *
  • *
  • *

    CU_EVENT_DISABLE_TIMING: * Specifies that the created event does not need to record timing data. * Events created with this flag specified and the CU_EVENT_BLOCKING_SYNC * flag not specified will provide the best performance when used with * cuStreamWaitEvent() and cuEventQuery(). *

    *
  • *
  • *

    CU_EVENT_INTERPROCESS: Specifies * that the created event may be used as an interprocess event by * cuIpcGetEventHandle(). CU_EVENT_INTERPROCESS must be specified along * with CU_EVENT_DISABLE_TIMING. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param phEvent Returns newly created event * @param Flags Event creation flags * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventCreate(CUevent phEvent, int Flags) { return checkResult(cuEventCreateNative(phEvent, Flags)); } private static native int cuEventCreateNative(CUevent phEvent, int Flags); /** * Records an event. * *
     * CUresult cuEventRecord (
     *      CUevent hEvent,
     *      CUstream hStream )
     * 
*
*

Records an event. Records an event. If * hStream is non-zero, the event is recorded after all preceding * operations in hStream have been completed; otherwise, it is * recorded after all preceding operations in the CUDA context have been * completed. Since * operation is asynchronous, cuEventQuery * and/or cuEventSynchronize() must be used to determine when the event * has actually been recorded. *

*

If cuEventRecord() has previously been * called on hEvent, then this call will overwrite any existing * state in hEvent. Any subsequent calls which examine the * status of hEvent will only examine the completion of this * most recent call to cuEventRecord(). *

*

It is necessary that hEvent * and hStream be created on the same context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to record * @param hStream Stream to record event for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventRecord(CUevent hEvent, CUstream hStream) { return checkResult(cuEventRecordNative(hEvent, hStream)); } private static native int cuEventRecordNative(CUevent hEvent, CUstream hStream); /** * Queries an event's status. * *
     * CUresult cuEventQuery (
     *      CUevent hEvent )
     * 
*
*

Queries an event's status. Query the * status of all device work preceding the most recent call to * cuEventRecord() (in the appropriate compute streams, as specified by * the arguments to cuEventRecord()). *

*

If this work has successfully been * completed by the device, or if cuEventRecord() has not been called on * hEvent, then CUDA_SUCCESS is returned. If this work has not * yet been completed by the device then CUDA_ERROR_NOT_READY is * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to query * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventQuery(CUevent hEvent) { return checkResult(cuEventQueryNative(hEvent)); } private static native int cuEventQueryNative(CUevent hEvent); /** * Waits for an event to complete. * *
     * CUresult cuEventSynchronize (
     *      CUevent hEvent )
     * 
*
*

Waits for an event to complete. Wait * until the completion of all device work preceding the most recent call * to cuEventRecord() (in the appropriate compute streams, as specified * by the arguments to cuEventRecord()). *

*

If cuEventRecord() has not been called * on hEvent, CUDA_SUCCESS is returned immediately. *

*

Waiting for an event that was created * with the CU_EVENT_BLOCKING_SYNC flag will cause the calling CPU thread * to block until the event has been completed by the device. If the * CU_EVENT_BLOCKING_SYNC flag has not been set, then the CPU thread will * busy-wait until the event has been completed by the device. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to wait for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventSynchronize(CUevent hEvent) { return checkResult(cuEventSynchronizeNative(hEvent)); } private static native int cuEventSynchronizeNative(CUevent hEvent); /** * Destroys an event. * *
     * CUresult cuEventDestroy (
     *      CUevent hEvent )
     * 
*
*

Destroys an event. Destroys the event * specified by hEvent. *

*

In case hEvent has been * recorded but has not yet been completed when cuEventDestroy() is * called, the function will return immediately and the resources * associated with hEvent will be released automatically once * the device has completed hEvent. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventDestroy(CUevent hEvent) { return checkResult(cuEventDestroyNative(hEvent)); } private static native int cuEventDestroyNative(CUevent hEvent); /** * Computes the elapsed time between two events. * *
     * CUresult cuEventElapsedTime (
     *      float* pMilliseconds,
     *      CUevent hStart,
     *      CUevent hEnd )
     * 
*
*

Computes the elapsed time between two * events. Computes the elapsed time between two events (in milliseconds * with a resolution * of around 0.5 microseconds). *

*

If either event was last recorded in a * non-NULL stream, the resulting time may be greater than expected (even * if both used * the same stream handle). This happens * because the cuEventRecord() operation takes place asynchronously and * there is no guarantee that the measured latency is actually just * between the two * events. Any number of other different * stream operations could execute in between the two measured events, * thus altering the * timing in a significant way. *

*

If cuEventRecord() has not been called * on either event then CUDA_ERROR_INVALID_HANDLE is returned. If * cuEventRecord() has been called on both events but one or both of them * has not yet been completed (that is, cuEventQuery() would return * CUDA_ERROR_NOT_READY on at least one of the events), CUDA_ERROR_NOT_READY * is returned. If either event was created with the CU_EVENT_DISABLE_TIMING * flag, then this function will return CUDA_ERROR_INVALID_HANDLE. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pMilliseconds Time between hStart and hEnd in ms * @param hStart Starting event * @param hEnd Ending event * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy */ public static int cuEventElapsedTime(float pMilliseconds[], CUevent hStart, CUevent hEnd) { return checkResult(cuEventElapsedTimeNative(pMilliseconds, hStart, hEnd)); } private static native int cuEventElapsedTimeNative(float pMilliseconds[], CUevent hStart, CUevent hEnd); /** * Returns information about a pointer. * *
     * CUresult cuPointerGetAttribute (
     *      void* data,
     *      CUpointer_attribute attribute,
     *      CUdeviceptr ptr )
     * 
*
*

Returns information about a pointer. * The supported attributes are: *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_CONTEXT: *

    *
  • *
*

*

Returns in *data the CUcontext * in which ptr was allocated or registered. The type of data must be CUcontext *. *

*

If ptr was not allocated by, * mapped by, or registered with a CUcontext which uses unified virtual * addressing then CUDA_ERROR_INVALID_VALUE is returned. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_MEMORY_TYPE: *

    *
  • *
*

*

Returns in *data the physical * memory type of the memory that ptr addresses as a CUmemorytype * enumerated value. The type of data must be unsigned int. *

*

If ptr addresses device memory * then *data is set to CU_MEMORYTYPE_DEVICE. The particular * CUdevice on which the memory resides is the CUdevice of the CUcontext * returned by the CU_POINTER_ATTRIBUTE_CONTEXT attribute of ptr. *

*

If ptr addresses host memory * then *data is set to CU_MEMORYTYPE_HOST. *

*

If ptr was not allocated by, * mapped by, or registered with a CUcontext which uses unified virtual * addressing then CUDA_ERROR_INVALID_VALUE is returned. *

*

If the current CUcontext does not * support unified virtual addressing then CUDA_ERROR_INVALID_CONTEXT is * returned. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_DEVICE_POINTER: *

    *
  • *
*

*

Returns in *data the device * pointer value through which ptr may be accessed by kernels * running in the current CUcontext. The type of data must be * CUdeviceptr *. *

*

If there exists no device pointer value * through which kernels running in the current CUcontext may access ptr then CUDA_ERROR_INVALID_VALUE is returned. *

*

If there is no current CUcontext then * CUDA_ERROR_INVALID_CONTEXT is returned. *

*

Except in the exceptional disjoint * addressing cases discussed below, the value returned in *data * will equal the input value ptr. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_HOST_POINTER: *

    *
  • *
*

*

Returns in *data the host * pointer value through which ptr may be accessed by by the * host program. The type of data must be void **. If there * exists no host pointer value through which the host program may directly * access ptr then CUDA_ERROR_INVALID_VALUE is returned. *

*

Except in the exceptional disjoint * addressing cases discussed below, the value returned in *data * will equal the input value ptr. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_P2P_TOKENS: *

    *
  • *
*

*

Returns in *data two tokens * for use with the nv-p2p.h Linux kernel interface. data must * be a struct of type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS. *

*

ptr must be a pointer to * memory obtained from :cuMemAlloc(). Note that p2pToken and vaSpaceToken * are only valid for the lifetime of the source allocation. A subsequent * allocation at * the same address may return completely * different tokens. *

*

* Note that for most allocations in the * unified virtual address space the host and device pointer for accessing * the allocation * will be the same. The exceptions to this * are *

    *
  • *

    user memory registered using * cuMemHostRegister *

    *
  • *
  • *

    host memory allocated using * cuMemHostAlloc with the CU_MEMHOSTALLOC_WRITECOMBINED flag For these * types of allocation there will exist separate, disjoint host and device * addresses for accessing the allocation. * In particular *

    *
  • *
  • *

    The host address will correspond * to an invalid unmapped device address (which will result in an exception * if accessed from * the device) *

    *
  • *
  • *

    The device address will * correspond to an invalid unmapped host address (which will result in * an exception if accessed from * the host). For these types of * allocations, querying CU_POINTER_ATTRIBUTE_HOST_POINTER and * CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host * and device addresses from either address. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param data Returned pointer attribute value * @param attribute Pointer attribute to query * @param ptr Pointer * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuMemHostUnregister */ public static int cuPointerGetAttribute(Pointer data, int attribute, CUdeviceptr ptr) { return checkResult(cuPointerGetAttributeNative(data, attribute, ptr)); } private static native int cuPointerGetAttributeNative(Pointer data, int attribute, CUdeviceptr ptr); public static int cuPointerSetAttribute(Pointer value, int attribute, CUdeviceptr ptr) { return checkResult(cuPointerSetAttribute(value, attribute, ptr)); } private static native int cuPointerSetAttributeNative(Pointer value, int attribute, CUdeviceptr ptr); /** * Create a stream. * *
     * CUresult cuStreamCreate (
     *      CUstream* phStream,
     *      unsigned int  Flags )
     * 
*
*

Create a stream. Creates a stream and * returns a handle in phStream. The Flags argument * determines behaviors of the stream. Valid values for Flags * are: *

    *
  • *

    CU_STREAM_DEFAULT: Default * stream creation flag. *

    *
  • *
  • *

    CU_STREAM_NON_BLOCKING: * Specifies that work running in the created stream may run concurrently * with work in stream 0 (the NULL stream), and that * the created stream should * perform no implicit synchronization with stream 0. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param phStream Returned newly created stream * @param Flags Parameters for stream creation * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamCreate(CUstream phStream, int Flags) { return checkResult(cuStreamCreateNative(phStream, Flags)); } private static native int cuStreamCreateNative(CUstream phStream, int Flags); public static int cuStreamCreateWithPriority(CUstream phStream, int flags, int priority) { return checkResult(cuStreamCreateWithPriorityNative(phStream, flags, priority)); } private static native int cuStreamCreateWithPriorityNative(CUstream phStream, int flags, int priority); public static int cuStreamGetPriority(CUstream hStream, int priority[]) { return checkResult(cuStreamGetPriorityNative(hStream, priority)); } private static native int cuStreamGetPriorityNative(CUstream hStream, int priority[]); public static int cuStreamGetFlags(CUstream hStream, int flags[]) { return checkResult(cuStreamGetFlagsNative(hStream, flags)); } private static native int cuStreamGetFlagsNative(CUstream hStream, int flags[]); /** * Make a compute stream wait on an event. * *
     * CUresult cuStreamWaitEvent (
     *      CUstream hStream,
     *      CUevent hEvent,
     *      unsigned int  Flags )
     * 
*
*

Make a compute stream wait on an event. * Makes all future work submitted to hStream wait until hEvent reports completion before beginning execution. This * synchronization will be performed efficiently on the device. The event * hEvent may be from a different * context than hStream, in which case this function will * perform cross-device synchronization. *

*

The stream hStream will wait * only for the completion of the most recent host call to cuEventRecord() * on hEvent. Once this call has returned, any functions * (including cuEventRecord() and cuEventDestroy()) may be called on hEvent again, and subsequent calls will not have any effect on * hStream. *

*

If hStream is 0 (the NULL * stream) any future work submitted in any stream will wait for hEvent to complete before beginning execution. This effectively * creates a barrier for all future work submitted to the context. *

*

If cuEventRecord() has not been called * on hEvent, this call acts as if the record has already * completed, and so is a functional no-op. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to wait * @param hEvent Event to wait on (may not be NULL) * @param Flags Parameters for the operation (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback * @see JCudaDriver#cuStreamDestroy */ public static int cuStreamWaitEvent(CUstream hStream, CUevent hEvent, int Flags) { return checkResult(cuStreamWaitEventNative(hStream, hEvent, Flags)); } private static native int cuStreamWaitEventNative(CUstream hStream, CUevent hEvent, int Flags); /** * Add a callback to a compute stream. * *
     * CUresult cuStreamAddCallback (
     *      CUstream hStream,
     *      CUstreamCallback callback,
     *      void* userData,
     *      unsigned int  flags )
     * 
*
*

Add a callback to a compute stream. Adds * a callback to be called on the host after all currently enqueued items * in the stream * have completed. For each cuStreamAddCallback * call, the callback will be executed exactly once. The callback will * block later * work in the stream until it is finished. *

*

The callback may be passed CUDA_SUCCESS * or an error code. In the event of a device error, all subsequently * executed callbacks will receive an appropriate CUresult. *

*

Callbacks must not make any CUDA API * calls. Attempting to use a CUDA API will result in CUDA_ERROR_NOT_PERMITTED. * Callbacks must not perform any synchronization that may depend on * outstanding device work or other callbacks that are not * mandated to run earlier. Callbacks * without a mandated order (in independent streams) execute in undefined * order and may be * serialized. *

*

This API requires compute capability * 1.1 or greater. See cuDeviceGetAttribute or cuDeviceGetProperties to * query compute capability. Attempting to use this API with earlier * compute versions will return CUDA_ERROR_NOT_SUPPORTED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to add callback to * @param callback The function to call once preceding stream operations are complete * @param userData User specified data to be passed to the callback function * @param flags Reserved for future use, must be 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamDestroy */ public static int cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, Object userData, int flags) { return checkResult(cuStreamAddCallbackNative(hStream, callback, userData, flags)); } private static native int cuStreamAddCallbackNative(CUstream hStream, CUstreamCallback callback, Object userData, int flags); public static int cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, long length, int flags) { return checkResult(cuStreamAttachMemAsyncNative(hStream, dptr, length, flags)); } private static native int cuStreamAttachMemAsyncNative(CUstream hStream, CUdeviceptr dptr, long length, int flags); /** * Determine status of a compute stream. * *
     * CUresult cuStreamQuery (
     *      CUstream hStream )
     * 
*
*

Determine status of a compute stream. * Returns CUDA_SUCCESS if all operations in the stream specified by hStream have completed, or CUDA_ERROR_NOT_READY if not. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to query status of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamQuery(CUstream hStream) { return checkResult(cuStreamQueryNative(hStream)); } private static native int cuStreamQueryNative(CUstream hStream); /** * Wait until a stream's tasks are completed. * *
     * CUresult cuStreamSynchronize (
     *      CUstream hStream )
     * 
*
*

Wait until a stream's tasks are completed. * Waits until the device has completed all operations in the stream * specified by * hStream. If the context was * created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will * block until the stream is finished with all of its tasks. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to wait for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamSynchronize(CUstream hStream) { return checkResult(cuStreamSynchronizeNative(hStream)); } private static native int cuStreamSynchronizeNative(CUstream hStream); /** * Destroys a stream. * *
     * CUresult cuStreamDestroy (
     *      CUstream hStream )
     * 
*
*

Destroys a stream. Destroys the stream * specified by hStream. *

*

In case the device is still doing work * in the stream hStream when cuStreamDestroy() is called, the * function will return immediately and the resources associated with hStream will be released automatically once the device has * completed all work in hStream. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamDestroy(CUstream hStream) { return checkResult(cuStreamDestroyNative(hStream)); } private static native int cuStreamDestroyNative(CUstream hStream); /** * Initializes OpenGL interoperability. * *
     * CUresult cuGLInit (
     *      void )
     * 
*
*

Initializes OpenGL interoperability. * DeprecatedThis function is * deprecated as of Cuda 3.0.Initializes OpenGL interoperability. * This function is deprecated and calling it is no longer required. It * may fail if the * needed OpenGL driver facilities are * not available. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGLMapBufferObject * @see JCudaDriver#cuGLRegisterBufferObject * @see JCudaDriver#cuGLUnmapBufferObject * @see JCudaDriver#cuGLUnregisterBufferObject * @see JCudaDriver#cuGLMapBufferObjectAsync * @see JCudaDriver#cuGLUnmapBufferObjectAsync * @see JCudaDriver#cuGLSetBufferObjectMapFlags */ public static int cuGLInit() { return checkResult(cuGLInitNative()); } private static native int cuGLInitNative(); /** * Create a CUDA context for interoperability with OpenGL. * *
     * CUresult cuGLCtxCreate (
     *      CUcontext* pCtx,
     *      unsigned int  Flags,
     *      CUdevice device )
     * 
*
*

Create a CUDA context for * interoperability with OpenGL. * DeprecatedThis function is * deprecated as of Cuda 5.0.This function is deprecated and should * no longer be used. It is no longer necessary to associate a CUDA * context with an OpenGL * context in order to achieve maximum * interoperability performance. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCtx Returned CUDA context * @param Flags Options for CUDA context creation * @param device Device on which to create the context * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuGLInit * @see JCudaDriver#cuGLMapBufferObject * @see JCudaDriver#cuGLRegisterBufferObject * @see JCudaDriver#cuGLUnmapBufferObject * @see JCudaDriver#cuGLUnregisterBufferObject * @see JCudaDriver#cuGLMapBufferObjectAsync * @see JCudaDriver#cuGLUnmapBufferObjectAsync * @see JCudaDriver#cuGLSetBufferObjectMapFlags */ public static int cuGLCtxCreate( CUcontext pCtx, int Flags, CUdevice device ) { return checkResult(cuGLCtxCreateNative(pCtx, Flags, device)); } private static native int cuGLCtxCreateNative(CUcontext pCtx, int Flags, CUdevice device); /** * Gets the CUDA devices associated with the current OpenGL context. * *
     * CUresult cuGLGetDevices (
     *      unsigned int* pCudaDeviceCount,
     *      CUdevice* pCudaDevices,
     *      unsigned int  cudaDeviceCount,
     *      CUGLDeviceList deviceList )
     * 
*
*

Gets the CUDA devices associated with * the current OpenGL context. Returns in *pCudaDeviceCount * the number of CUDA-compatible devices corresponding to the current * OpenGL context. Also returns in *pCudaDevices at most * cudaDeviceCount of the CUDA-compatible devices corresponding to the * current OpenGL context. If any of the GPUs being * used by the current OpenGL context are * not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE. *

*

The deviceList argument may * be any of the following: *

    *
  • *

    CU_GL_DEVICE_LIST_ALL: Query * all devices used by the current OpenGL context. *

    *
  • *
  • *

    CU_GL_DEVICE_LIST_CURRENT_FRAME: * Query the devices used by the current OpenGL context to render the * current frame (in SLI). *

    *
  • *
  • *

    CU_GL_DEVICE_LIST_NEXT_FRAME: * Query the devices used by the current OpenGL context to render the next * frame (in SLI). Note that this is a prediction, * it can't be guaranteed that this * is correct in all cases. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaDeviceCount Returned number of CUDA devices. * @param pCudaDevices Returned CUDA devices. * @param cudaDeviceCount The size of the output device array pCudaDevices. * @param deviceList The set of devices to return. * * @return CUDA_SUCCESS, CUDA_ERROR_NO_DEVICE, * CUDA_ERROR_INVALID_VALUECUDA_ERROR_INVALID_CONTEXT * */ public static int cuGLGetDevices(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList) { return checkResult(cuGLGetDevicesNative(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, CUGLDeviceList_deviceList)); } private static native int cuGLGetDevicesNative(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList); /** * Registers an OpenGL buffer object. * *
     * CUresult cuGraphicsGLRegisterBuffer (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
*
*

Registers an OpenGL buffer object. * Registers the buffer object specified by buffer for access * by CUDA. A handle to the registered object is returned as pCudaResource. The register flags Flags specify the * intended usage, as follows: *

*
    *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from and * written to by CUDA. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: * Specifies that CUDA will not write to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will * not read from this resource and will write over the entire * contents of the resource, so * none of the data previously stored in the resource will be preserved. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaResource Pointer to the returned object handle * @param buffer name of buffer object to be registered * @param Flags Register flags * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, * CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsUnregisterResource * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags) { return checkResult(cuGraphicsGLRegisterBufferNative(pCudaResource, buffer, Flags)); } private static native int cuGraphicsGLRegisterBufferNative(CUgraphicsResource pCudaResource, int buffer, int Flags); /** * Register an OpenGL texture or renderbuffer object. * *
     * CUresult cuGraphicsGLRegisterImage (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint image,
     *      GLenum target,
     *      unsigned int  Flags )
     * 
*
*

Register an OpenGL texture or renderbuffer * object. Registers the texture or renderbuffer object specified by image for access by CUDA. A handle to the registered object is * returned as pCudaResource. *

*

target must match the type of * the object, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, * GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D, * GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER. *

*

The register flags Flags * specify the intended usage, as follows: *

*
    *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from and * written to by CUDA. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: * Specifies that CUDA will not write to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will * not read from this resource and will write over the entire * contents of the resource, so * none of the data previously stored in the resource will be preserved. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST: * Specifies that CUDA will bind this resource to a surface * reference. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will * perform texture gather operations on this resource. *

    *
  • *
*

*

The following image formats are * supported. For brevity's sake, the list is abbreviated. For ex., {GL_R, * GL_RG} X {8, 16} would * expand to the following 4 formats {GL_R8, * GL_R16, GL_RG8, GL_RG16} : *

    *
  • *

    GL_RED, GL_RG, GL_RGBA, * GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY *

    *
  • *
  • *

    {GL_R, GL_RG, GL_RGBA} X {8, * 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I} *

    *
  • *
  • *

    {GL_LUMINANCE, GL_ALPHA, * GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT, * 16UI_EXT, 32UI_EXT, 8I_EXT, * 16I_EXT, 32I_EXT} *

    *
  • *
*

*

The following image classes are currently * disallowed: *

    *
  • *

    Textures with borders

    *
  • *
  • *

    Multisampled renderbuffers

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaResource Pointer to the returned object handle * @param image name of texture or renderbuffer object to be registered * @param target Identifies the type of object specified by image * @param Flags Register flags * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, * CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsUnregisterResource * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray */ public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags ) { return checkResult(cuGraphicsGLRegisterImageNative(pCudaResource, image, target, Flags)); } private static native int cuGraphicsGLRegisterImageNative(CUgraphicsResource pCudaResource, int image, int target, int Flags); /** * Registers an OpenGL buffer object. * *
     * CUresult cuGLRegisterBufferObject (
     *      GLuint buffer )
     * 
*
*

Registers an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Registers the buffer object specified * by buffer for access by CUDA. This function must be called * before CUDA can map the buffer object. There must be a valid OpenGL * context * bound to the current thread when this * function is called, and the buffer name is resolved by that context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer The name of the buffer object to register. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED * * @see JCudaDriver#cuGraphicsGLRegisterBuffer */ public static int cuGLRegisterBufferObject( int bufferobj ) { return checkResult(cuGLRegisterBufferObjectNative(bufferobj)); } private static native int cuGLRegisterBufferObjectNative(int bufferobj); /** * Maps an OpenGL buffer object. * *
     * CUresult cuGLMapBufferObject (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer )
     * 
*
*

Maps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Maps the buffer object specified by * buffer into the address space of the current CUDA context * and returns in *dptr and *size the base pointer * and size of the resulting mapping. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

All streams in the current CUDA * context are synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned mapped base pointer * @param size Returned size of mapping * @param buffer The name of the buffer object to map * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGLMapBufferObject( CUdeviceptr dptr, long size[], int bufferobj ) { return checkResult(cuGLMapBufferObjectNative(dptr, size, bufferobj)); } private static native int cuGLMapBufferObjectNative(CUdeviceptr dptr, long size[], int bufferobj); /** * Unmaps an OpenGL buffer object. * *
     * CUresult cuGLUnmapBufferObject (
     *      GLuint buffer )
     * 
*
*

Unmaps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unmaps the buffer object specified by * buffer for access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

All streams in the current CUDA * context are synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Buffer object to unmap * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnmapResources */ public static int cuGLUnmapBufferObject( int bufferobj ) { return checkResult(cuGLUnmapBufferObjectNative(bufferobj)); } private static native int cuGLUnmapBufferObjectNative(int bufferobj); /** * Unregister an OpenGL buffer object. * *
     * CUresult cuGLUnregisterBufferObject (
     *      GLuint buffer )
     * 
*
*

Unregister an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unregisters the buffer object specified * by buffer. This releases any resources associated with the * registered buffer. After this call, the buffer may no longer be mapped * for * access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Name of the buffer object to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnregisterResource */ public static int cuGLUnregisterBufferObject( int bufferobj ) { return checkResult(cuGLUnregisterBufferObjectNative(bufferobj)); } private static native int cuGLUnregisterBufferObjectNative(int bufferobj); /** * Set the map flags for an OpenGL buffer object. * *
     * CUresult cuGLSetBufferObjectMapFlags (
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
*
*

Set the map flags for an OpenGL buffer * object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Sets the map flags for the buffer * object specified by buffer. *

*

Changes to Flags will take * effect the next time buffer is mapped. The Flags * argument may be any of the following: *

    *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from * and written to by CUDA kernels. This is the default value. *

    *
  • *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: * Specifies that CUDA kernels which access this resource will not write * to this resource. *

    *
  • *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels * which access this resource will not read from this resource * and will write over the * entire contents of the resource, so none of the data previously stored * in the resource will be preserved. *

    *
  • *
*

*

If buffer has not been * registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is * returned. If buffer is presently mapped for access by CUDA, * then CUDA_ERROR_ALREADY_MAPPED is returned. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Buffer object to unmap * @param Flags Map flags * * @return CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsResourceSetMapFlags */ public static int cuGLSetBufferObjectMapFlags( int buffer, int Flags ) { return checkResult((cuGLSetBufferObjectMapFlagsNative(buffer, Flags))); } private static native int cuGLSetBufferObjectMapFlagsNative( int buffer, int Flags ); /** * Maps an OpenGL buffer object. * *
     * CUresult cuGLMapBufferObjectAsync (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer,
     *      CUstream hStream )
     * 
*
*

Maps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Maps the buffer object specified by * buffer into the address space of the current CUDA context * and returns in *dptr and *size the base pointer * and size of the resulting mapping. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

Stream hStream in the * current CUDA context is synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned mapped base pointer * @param size Returned size of mapping * @param buffer The name of the buffer object to map * @param hStream Stream to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGLMapBufferObjectAsync( CUdeviceptr dptr, long size[], int buffer, CUstream hStream) { return checkResult((cuGLMapBufferObjectAsyncNative(dptr, size, buffer, hStream))); } private static native int cuGLMapBufferObjectAsyncNative( CUdeviceptr dptr, long size[], int buffer, CUstream hStream); /** * Unmaps an OpenGL buffer object. * *
     * CUresult cuGLUnmapBufferObjectAsync (
     *      GLuint buffer,
     *      CUstream hStream )
     * 
*
*

Unmaps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unmaps the buffer object specified by * buffer for access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

Stream hStream in the * current CUDA context is synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Name of the buffer object to unmap * @param hStream Stream to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnmapResources */ public static int cuGLUnmapBufferObjectAsync( int buffer, CUstream hStream ) { return checkResult((cuGLUnmapBufferObjectAsyncNative(buffer, hStream))); } private static native int cuGLUnmapBufferObjectAsyncNative( int buffer, CUstream hStream ); /** * Unregisters a graphics resource for access by CUDA. * *
     * CUresult cuGraphicsUnregisterResource (
     *      CUgraphicsResource resource )
     * 
*
*

Unregisters a graphics resource for * access by CUDA. Unregisters the graphics resource resource * so it is not accessible by CUDA unless registered again. *

*

If resource is invalid then * CUDA_ERROR_INVALID_HANDLE is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param resource Resource to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsGLRegisterBuffer * @see JCudaDriver#cuGraphicsGLRegisterImage */ public static int cuGraphicsUnregisterResource(CUgraphicsResource resource) { return checkResult(cuGraphicsUnregisterResourceNative(resource)); } private static native int cuGraphicsUnregisterResourceNative(CUgraphicsResource resource); /** * Get an array through which to access a subresource of a mapped graphics resource. * *
     * CUresult cuGraphicsSubResourceGetMappedArray (
     *      CUarray* pArray,
     *      CUgraphicsResource resource,
     *      unsigned int  arrayIndex,
     *      unsigned int  mipLevel )
     * 
*
*

Get an array through which to access a * subresource of a mapped graphics resource. Returns in *pArray * an array through which the subresource of the mapped graphics resource * resource which corresponds to array index arrayIndex * and mipmap level mipLevel may be accessed. The value set in * *pArray may change every time that resource is * mapped. *

*

If resource is not a texture * then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY * is returned. If arrayIndex is not a valid array index for * resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then * CUDA_ERROR_INVALID_VALUE is returned. If resource is not * mapped then CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArray Returned array through which a subresource of resource may be accessed * @param resource Mapped resource to access * @param arrayIndex Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access * @param mipLevel Mipmap level for the subresource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel) { return checkResult(cuGraphicsSubResourceGetMappedArrayNative(pArray, resource, arrayIndex, mipLevel)); } private static native int cuGraphicsSubResourceGetMappedArrayNative(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel); /** * Get a mipmapped array through which to access a mapped graphics resource. * *
     * CUresult cuGraphicsResourceGetMappedMipmappedArray (
     *      CUmipmappedArray* pMipmappedArray,
     *      CUgraphicsResource resource )
     * 
*
*

Get a mipmapped array through which to * access a mapped graphics resource. Returns in *pMipmappedArray * a mipmapped array through which the mapped graphics resource resource. The value set in *pMipmappedArray may change * every time that resource is mapped. *

*

If resource is not a texture * then it cannot be accessed via a mipmapped array and * CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If resource is * not mapped then CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pMipmappedArray Returned mipmapped array through which resource may be accessed * @param resource Mapped resource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource) { return checkResult(cuGraphicsResourceGetMappedMipmappedArrayNative(pMipmappedArray, resource)); } private static native int cuGraphicsResourceGetMappedMipmappedArrayNative(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource); /** * Get a device pointer through which to access a mapped graphics resource. * *
     * CUresult cuGraphicsResourceGetMappedPointer (
     *      CUdeviceptr* pDevPtr,
     *      size_t* pSize,
     *      CUgraphicsResource resource )
     * 
*
*

Get a device pointer through which to * access a mapped graphics resource. Returns in *pDevPtr a * pointer through which the mapped graphics resource resource * may be accessed. Returns in pSize the size of the memory in * bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is * mapped. *

*

If resource is not a buffer * then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER * is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED * is returned. * *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pDevPtr Returned pointer through which resource may be accessed * @param pSize Returned size of the buffer accessible starting at *pPointer * @param resource Mapped resource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_POINTER * * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray */ public static int cuGraphicsResourceGetMappedPointer( CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource ) { return checkResult(cuGraphicsResourceGetMappedPointerNative(pDevPtr, pSize, resource)); } private static native int cuGraphicsResourceGetMappedPointerNative(CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource); /** * Set usage flags for mapping a graphics resource. * *
     * CUresult cuGraphicsResourceSetMapFlags (
     *      CUgraphicsResource resource,
     *      unsigned int  flags )
     * 
*
*

Set usage flags for mapping a graphics * resource. Set flags for mapping the graphics resource resource. *

*

Changes to flags will take * effect the next time resource is mapped. The flags * argument may be any of the following: *

*
    *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that * this resource will be read from * and written to by CUDA kernels. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: * Specifies that CUDA kernels which access this resource will not write * to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA * kernels which access this resource will not read from this * resource and will write over * the entire contents of the resource, so none of the data previously * stored in the resource will * be preserved. *

    *
  • *
*

*

If resource is presently * mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned. * If flags is not one of the above values then * CUDA_ERROR_INVALID_VALUE is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param resource Registered resource to set flags for * @param flags Parameters for resource mapping * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, int flags ) { return checkResult(cuGraphicsResourceSetMapFlagsNative(resource, flags)); } private static native int cuGraphicsResourceSetMapFlagsNative( CUgraphicsResource resource, int flags ); /** * Map graphics resources for access by CUDA. * *
     * CUresult cuGraphicsMapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
*
*

Map graphics resources for access by * CUDA. Maps the count graphics resources in resources * for access by CUDA. *

*

The resources in resources * may be accessed by CUDA until they are unmapped. The graphics API from * which resources were registered should not access any * resources while they are mapped by CUDA. If an application does so, * the results are * undefined. *

*

This function provides the synchronization * guarantee that any graphics calls issued before cuGraphicsMapResources() * will complete before any subsequent CUDA work issued in stream * begins. *

*

If resources includes any * duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any * of resources are presently mapped for access by CUDA then * CUDA_ERROR_ALREADY_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Number of resources to map * @param resources Resources to map for CUDA usage * @param hStream Stream with which to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray * @see JCudaDriver#cuGraphicsUnmapResources */ public static int cuGraphicsMapResources(int count, CUgraphicsResource resources[], CUstream hStream) { return checkResult(cuGraphicsMapResourcesNative(count, resources, hStream)); } private static native int cuGraphicsMapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream); /** * Unmap graphics resources. * *
     * CUresult cuGraphicsUnmapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
*
*

Unmap graphics resources. Unmaps the * count graphics resources in resources. *

*

Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped * again. *

*

This function provides the synchronization * guarantee that any CUDA work issued in stream before * cuGraphicsUnmapResources() will complete before any subsequently issued * graphics work begins. *

*

If resources includes any * duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any * of resources are not presently mapped for access by CUDA then * CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Number of resources to unmap * @param resources Resources to unmap * @param hStream Stream with which to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGraphicsUnmapResources( int count, CUgraphicsResource resources[], CUstream hStream) { return checkResult(cuGraphicsUnmapResourcesNative(count, resources, hStream)); } private static native int cuGraphicsUnmapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream); /** * Set resource limits. * *
     * CUresult cuCtxSetLimit (
     *      CUlimit limit,
     *      size_t value )
     * 
*
*

Set resource limits. Setting limit to value is a request by the application to * update the current limit maintained by the context. The driver is free * to modify the requested * value to meet h/w requirements (this * could be clamping to minimum or maximum values, rounding up to nearest * element size, * etc). The application can use * cuCtxGetLimit() to find out exactly what the limit has been set to. *

*

Setting each CUlimit has its own specific * restrictions, so each is discussed here. *

*
    *
  • *

    CU_LIMIT_STACK_SIZE controls * the stack size in bytes of each GPU thread. This limit is only * applicable to devices of compute capability 2.0 and * higher. Attempting to set this * limit on devices of compute capability less than 2.0 will result in * the error CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_PRINTF_FIFO_SIZE * controls the size in bytes of the FIFO used by the printf() device * system call. Setting CU_LIMIT_PRINTF_FIFO_SIZE must be performed before * launching any kernel that uses the printf() device system call, * otherwise CUDA_ERROR_INVALID_VALUE will be returned. This limit is only * applicable to devices of compute capability 2.0 and higher. Attempting * to set this limit * on devices of compute capability * less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT * being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_MALLOC_HEAP_SIZE * controls the size in bytes of the heap used by the malloc() and free() * device system calls. Setting CU_LIMIT_MALLOC_HEAP_SIZE must be performed * before launching any kernel that uses the malloc() or free() device * system calls, otherwise CUDA_ERROR_INVALID_VALUE will be returned. This * limit is only applicable to devices of compute capability 2.0 and * higher. Attempting to set this limit * on devices of compute capability * less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT * being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH * controls the maximum nesting depth of a grid at which a thread can * safely call cudaDeviceSynchronize(). Setting this limit * must be performed before any * launch of a kernel that uses the device runtime and calls * cudaDeviceSynchronize() above the default * sync depth, two levels of grids. * Calls to cudaDeviceSynchronize() will fail with error code * cudaErrorSyncDepthExceeded if * the limitation is violated. This * limit can be set smaller than the default or up the maximum launch * depth of 24. When setting * this limit, keep in mind that * additional levels of sync depth require the driver to reserve large * amounts of device memory * which can no longer be used for * user allocations. If these reservations of device memory fail, * cuCtxSetLimit will return CUDA_ERROR_OUT_OF_MEMORY, and the limit can * be reset to a lower value. This limit is only applicable to devices of * compute capability 3.5 and higher. * Attempting to set this limit on * devices of compute capability less than 3.5 will result in the error * CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number * of outstanding device runtime launches that can be made from the * current context. A grid is outstanding * from the point of launch up * until the grid is known to have been completed. Device runtime launches * which violate this limitation * fail and return * cudaErrorLaunchPendingCountExceeded when cudaGetLastError() is called * after launch. If more pending launches * than the default (2048 launches) * are needed for a module using the device runtime, this limit can be * increased. Keep in mind * that being able to sustain * additional pending launches will require the driver to reserve larger * amounts of device memory * upfront which can no longer be * used for allocations. If these reservations fail, cuCtxSetLimit will * return CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower * value. This limit is only applicable to devices of compute capability * 3.5 and higher. * Attempting to set this limit on * devices of compute capability less than 3.5 will result in the error * CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param limit Limit to set * @param value Size of limit * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_UNSUPPORTED_LIMIT, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxSetLimit(int limit, long value) { return checkResult(cuCtxSetLimitNative(limit, value)); } private static native int cuCtxSetLimitNative(int limit, long value); /** * Returns the preferred cache configuration for the current context. * *
     * CUresult cuCtxGetCacheConfig (
     *      CUfunc_cache* pconfig )
     * 
*
*

Returns the preferred cache configuration * for the current context. On devices where the L1 cache and shared * memory use the * same hardware resources, this function * returns through pconfig the preferred cache configuration * for the current context. This is only a preference. The driver will * use the requested configuration * if possible, but it is free to choose a * different configuration if required to execute functions. *

*

This will return a pconfig of * CU_FUNC_CACHE_PREFER_NONE on devices where the size of the L1 cache * and shared memory are fixed. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pconfig Returned cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxGetCacheConfig(int pconfig[]) { return checkResult(cuCtxGetCacheConfigNative(pconfig)); } private static native int cuCtxGetCacheConfigNative(int[] pconfig); /** * Sets the preferred cache configuration for the current context. * *
     * CUresult cuCtxSetCacheConfig (
     *      CUfunc_cache config )
     * 
*
*

Sets the preferred cache configuration * for the current context. On devices where the L1 cache and shared * memory use the same * hardware resources, this sets through * config the preferred cache configuration for the current * context. This is only a preference. The driver will use the requested * configuration * if possible, but it is free to choose a * different configuration if required to execute the function. Any * function preference * set via cuFuncSetCacheConfig() will be * preferred over this context-wide setting. Setting the context-wide * cache configuration to CU_FUNC_CACHE_PREFER_NONE will cause subsequent * kernel launches to prefer to not change the cache configuration unless * required to launch the kernel. *

*

This setting does nothing on devices * where the size of the L1 cache and shared memory are fixed. *

*

Launching a kernel with a different * preference than the most recent preference setting may insert a * device-side synchronization * point. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param config Requested cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxSetCacheConfig(int config) { return checkResult(cuCtxSetCacheConfigNative(config)); } private static native int cuCtxSetCacheConfigNative(int config); /** * Returns the current shared memory configuration for the current context. * *
     * CUresult cuCtxGetSharedMemConfig (
     *      CUsharedconfig* pConfig )
     * 
*
*

Returns the current shared memory * configuration for the current context. This function will return in * pConfig the current size of shared memory banks in the * current context. On devices with configurable shared memory banks, * cuCtxSetSharedMemConfig can be used to change this setting, so that * all subsequent kernel launches will by default use the new bank size. * When cuCtxGetSharedMemConfig is called on devices without configurable * shared memory, it will return the fixed bank size of the hardware. *

*

The returned bank configurations can be * either: *

    *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is * four bytes. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width * will eight bytes. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pConfig returned shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxGetSharedMemConfig(int pConfig[]) { return checkResult(cuCtxGetSharedMemConfig(pConfig)); } private static native int cuCtxGetSharedMemConfigNative(int pConfig[]); /** * Sets the shared memory configuration for the current context. * *
     * CUresult cuCtxSetSharedMemConfig (
     *      CUsharedconfig config )
     * 
*
*

Sets the shared memory configuration for * the current context. On devices with configurable shared memory banks, * this function * will set the context's shared memory bank * size which is used for subsequent kernel launches. *

*

Changed the shared memory configuration * between launches may insert a device side synchronization point between * those launches. *

*

Changing the shared memory bank size * will not increase shared memory usage or affect occupancy of kernels, * but may have major * effects on performance. Larger bank sizes * will allow for greater potential bandwidth to shared memory, but will * change what * kinds of accesses to shared memory will * result in bank conflicts. *

*

This function will do nothing on devices * with fixed shared memory bank size. *

*

The supported bank configurations are: *

    *
  • *

    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: * set bank width to the default initial setting (currently, four bytes). *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width * to be natively four bytes. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank * width to be natively eight bytes. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param config requested shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxSetSharedMemConfig(int config) { return checkResult(cuCtxSetSharedMemConfigNative(config)); } private static native int cuCtxSetSharedMemConfigNative(int config); /** * Gets the context's API version. * *
     * CUresult cuCtxGetApiVersion (
     *      CUcontext ctx,
     *      unsigned int* version )
     * 
*
*

Gets the context's API version. Returns * a version number in version corresponding to the capabilities * of the context (e.g. 3010 or 3020), which library developers can use * to direct callers * to a specific API version. If ctx is NULL, returns the API version used to create the currently * bound context. *

*

Note that new API versions are only * introduced when context capabilities are changed that break binary * compatibility, so the * API version and driver version may be * different. For example, it is valid for the API version to be 3020 * while the driver * version is 4020. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to check * @param version Pointer to version * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetApiVersion(CUcontext ctx, int version[]) { return checkResult(cuCtxGetApiVersionNative(ctx, version)); } private static native int cuCtxGetApiVersionNative(CUcontext ctx, int version[]); /** * Returns numerical values that correspond to the least and * greatest stream priorities.
*
* Returns in *leastPriority and *greatestPriority the numerical values that correspond * to the least and greatest stream priorities respectively. Stream priorities * follow a convention where lower numbers imply greater priorities. The range of * meaningful stream priorities is given by [*greatestPriority, *leastPriority]. * If the user attempts to create a stream with a priority value that is * outside the meaningful range as specified by this API, the priority is * automatically clamped down or up to either *leastPriority or *greatestPriority * respectively. See ::cuStreamCreateWithPriority for details on creating a * priority stream.
* A NULL may be passed in for *leastPriority or *greatestPriority if the value * is not desired.
*
* This function will return '0' in both \p *leastPriority and \p *greatestPriority if * the current context's device does not support stream priorities * (see ::cuDeviceGetAttribute). * * @param leastPriority Pointer to an int in which the numerical value for least * stream priority is returned * @param greatestPriority Pointer to an int in which the numerical value for greatest * stream priority is returned * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuStreamCreateWithPriority * @see JCudaDriver#cuStreamGetPriority, * @see JCudaDriver#cuCtxGetDevice, * @see JCudaDriver#cuCtxSetLimit, * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetStreamPriorityRange(int leastPriority[], int greatestPriority[]) { return checkResult(cuCtxGetStreamPriorityRangeNative(leastPriority, greatestPriority)); } private static native int cuCtxGetStreamPriorityRangeNative(int leastPriority[], int greatestPriority[]); /** * Launches a CUDA function. * *
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
CUresult cuLaunchKernel (CUfunction  f,
unsigned int  gridDimX,
unsigned int  gridDimY,
unsigned int  gridDimZ,
unsigned int  blockDimX,
unsigned int  blockDimY,
unsigned int  blockDimZ,
unsigned int  sharedMemBytes,
CUstream  hStream,
void **  kernelParams,
void **  extra 
)
*
*
*

* Invokes the kernel f on a gridDimX x * gridDimY x gridDimZ grid of blocks. Each * block contains blockDimX x blockDimY x * blockDimZ threads. *

* sharedMemBytes sets the amount of dynamic shared memory * that will be available to each thread block. *

* cuLaunchKernel() can optionally be associated to a stream by passing a * non-zero hStream argument. *

* Kernel parameters to f can be specified in one of two * ways: *

* 1) Kernel parameters can be specified via kernelParams. * If f has N parameters, then kernelParams * needs to be an array of N pointers. Each of kernelParams[0] * through kernelParams[N-1] must point to a region of memory * from which the actual kernel parameter will be copied. The number of * kernel parameters and their offsets and sizes do not need to be * specified as that information is retrieved directly from the kernel's * image. *

* 2) Kernel parameters can also be packaged by the application into a * single buffer that is passed in via the extra parameter. * This places the burden on the application of knowing each kernel * parameter's size and alignment/padding within the buffer. Here is an * example of using the extra parameter in this manner: *

*
    size_t argBufferSize;
     *     char argBuffer[256];
     * 
     *     // populate argBuffer and argBufferSize
     * 
     *     void *config[] = {
     *         CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
     *         CU_LAUNCH_PARAM_BUFFER_SIZE,    &argBufferSize,
     *         CU_LAUNCH_PARAM_END
     *     };
     *     status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL,
     * config);
     * 
*
*

* The extra parameter exists to allow cuLaunchKernel to take * additional less commonly used arguments. extra specifies * a list of names of extra settings and their corresponding values. Each * extra setting name is immediately followed by the corresponding value. * The list must be terminated with either NULL or * CU_LAUNCH_PARAM_END. *

*

    *
  • CU_LAUNCH_PARAM_END, which indicates the end of the extra * array; *
  • *
  • CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that * the next value in extra will be a pointer to a buffer * containing all the kernel parameters for launching kernel * f; *
  • *
  • CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies * that the next value in extra will be a pointer to a size_t * containing the size of the buffer specified with * CU_LAUNCH_PARAM_BUFFER_POINTER; *
  • *
*

* The error CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters * are specified with both kernelParams and extra * (i.e. both kernelParams and extra are * non-NULL). *

* Calling cuLaunchKernel() sets persistent function state that is the * same as function state set through the following deprecated APIs: *

* cuFuncSetBlockShape() cuFuncSetSharedSize() cuParamSetSize() * cuParamSeti() cuParamSetf() cuParamSetv() *

* When the kernel f is launched via cuLaunchKernel(), the * previous block shape, shared size and parameter info associated with * f is overwritten. *

* Note that to use cuLaunchKernel(), the kernel f must * either have been compiled with toolchain version 3.2 or later so that * it will contain kernel parameter information, or have no kernel * parameters. If either of these conditions is not met, then * cuLaunchKernel() will return CUDA_ERROR_INVALID_IMAGE. *

*

*
* * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute */ public static int cuLaunchKernel( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra) { return checkResult(cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra)); } private static native int cuLaunchKernelNative( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra); /** * Returns resource limits. * *
     * CUresult cuCtxGetLimit (
     *      size_t* pvalue,
     *      CUlimit limit )
     * 
*
*

Returns resource limits. Returns in *pvalue the current size of limit. The supported * CUlimit values are: *

    *
  • *

    CU_LIMIT_STACK_SIZE: stack size * in bytes of each GPU thread. *

    *
  • *
  • *

    CU_LIMIT_PRINTF_FIFO_SIZE: size * in bytes of the FIFO used by the printf() device system call. *

    *
  • *
  • *

    CU_LIMIT_MALLOC_HEAP_SIZE: size * in bytes of the heap used by the malloc() and free() device system * calls. *

    *
  • *
  • *

    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: * maximum grid depth at which a thread can issue the device runtime call * cudaDeviceSynchronize() to wait on child grid launches * to complete. *

    *
  • *
  • *

    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of * outstanding device runtime launches that can be made from this * context. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pvalue Returned size of limit * @param limit Limit to query * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_UNSUPPORTED_LIMIT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetLimit(long pvalue[], int limit) { return checkResult(cuCtxGetLimitNative(pvalue, limit)); } private static native int cuCtxGetLimitNative(long pvalue[], int limit); /** * Initialize the profiling. * *
     * CUresult cuProfilerInitialize (
     *      const char* configFile,
     *      const char* outputFile,
     *      CUoutput_mode outputMode )
     * 
*
*

Initialize the profiling. Using this * API user can initialize the CUDA profiler by specifying the configuration * file, output * file and output file format. This API is * generally used to profile different set of counters by looping the * kernel launch. * The configFile parameter can * be used to select profiling options including profiler counters. Refer * to the "Compute Command Line Profiler * User Guide" for supported profiler * options and counters. *

*

Limitation: The CUDA profiler cannot be * initialized with this API if another profiling tool is already active, * as indicated * by the CUDA_ERROR_PROFILER_DISABLED * return code. *

*

Typical usage of the profiling APIs is * as follows: *

*

for each set of counters/options * { * cuProfilerInitialize(); //Initialize * profiling, set the counters or options in the config file * ... * cuProfilerStart(); * // code to be profiled * cuProfilerStop(); * ... * cuProfilerStart(); * // code to be profiled * cuProfilerStop(); * ... * } *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param configFile Name of the config file that lists the counters/options for profiling. * @param outputFile Name of the outputFile where the profiling results will be stored. * @param outputMode outputMode, can be CU_OUT_KEY_VALUE_PAIR or CU_OUT_CSV. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_PROFILER_DISABLED * * @see JCudaDriver#cuProfilerStart * @see JCudaDriver#cuProfilerStop */ public static int cuProfilerInitialize(String configFile, String outputFile, int outputMode) { return checkResult(cuProfilerInitializeNative(configFile, outputFile, outputMode)); } private static native int cuProfilerInitializeNative(String configFile, String outputFile, int outputMode); /** * Enable profiling. * *
     * CUresult cuProfilerStart (
     *      void )
     * 
*
*

Enable profiling. Enables profile * collection by the active profiling tool. If profiling is already * enabled, then cuProfilerStart() has no effect. *

*

cuProfilerStart and cuProfilerStop APIs * are used to programmatically control the profiling granularity by * allowing profiling * to be done only on selective pieces of * code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuProfilerInitialize * @see JCudaDriver#cuProfilerStop */ public static int cuProfilerStart() { return checkResult(cuProfilerStartNative()); } private static native int cuProfilerStartNative(); /** * Disable profiling. * *
     * CUresult cuProfilerStop (
     *      void )
     * 
*
*

Disable profiling. Disables profile * collection by the active profiling tool. If profiling is already * disabled, then cuProfilerStop() has no effect. *

*

cuProfilerStart and cuProfilerStop APIs * are used to programmatically control the profiling granularity by * allowing profiling * to be done only on selective pieces of * code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuProfilerInitialize * @see JCudaDriver#cuProfilerStart */ public static int cuProfilerStop() { return checkResult(cuProfilerStopNative()); } private static native int cuProfilerStopNative(); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy