All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jcuda.driver.JCudaDriver Maven / Gradle / Ivy

There is a newer version: 12.0.0
Show newest version
/*
 * JCuda - Java bindings for NVIDIA CUDA driver and runtime API
 *
 * Copyright (c) 2009-2015 Marco Hutter - http://www.jcuda.org
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

package jcuda.driver;

import java.util.Arrays;

import jcuda.CudaException;
import jcuda.JCudaVersion;
import jcuda.LibUtils;
import jcuda.LibUtilsCuda;
import jcuda.LogLevel;
import jcuda.Pointer;
import jcuda.runtime.JCuda;

/**
 * Java bindings for the NVidia CUDA driver API.
*
* Most comments are extracted from the CUDA online documentation */ public class JCudaDriver { /** The CUDA version */ public static final int CUDA_VERSION = 11020; /** * If set, host memory is portable between CUDA contexts. * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_PORTABLE = 0x01; /** * If set, host memory is mapped into CUDA address space and * JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer. * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_DEVICEMAP = 0x02; /** * If set, host memory is allocated as write-combined - fast to write, * faster to DMA, slow to read except via SSE4 streaming load instruction * (MOVNTDQA). * Flag for {@link JCudaDriver#cuMemHostAlloc} */ public static final int CU_MEMHOSTALLOC_WRITECOMBINED = 0x04; /** * If set, host memory is portable between CUDA contexts. * Flag for ::cuMemHostRegister() */ public static final int CU_MEMHOSTREGISTER_PORTABLE = 0x01; /** * If set, host memory is mapped into CUDA address space and * ::cuMemHostGetDevicePointer() may be called on the host pointer. * Flag for ::cuMemHostRegister() */ public static final int CU_MEMHOSTREGISTER_DEVICEMAP = 0x02; /** * If set, peer memory is mapped into CUDA address space and * ::cuMemPeerGetDevicePointer() may be called on the host pointer. * Flag for ::cuMemPeerRegister() * @deprecated This value has been added in CUDA 4.0 RC, * and removed in CUDA 4.0 RC2 */ @Deprecated public static final int CU_MEMPEERREGISTER_DEVICEMAP = 0x02; /** * If set, the passed memory pointer is treated as pointing to some * memory-mapped I/O space, e.g. belonging to a third-party PCIe device. * On Windows the flag is a no-op. * On Linux that memory is marked as non cache-coherent for the GPU and * is expected to be physically contiguous. It may return * CUDA_ERROR_NOT_PERMITTED if run as an unprivileged user, * CUDA_ERROR_NOT_SUPPORTED on older Linux kernel versions. * On all other platforms, it is not supported and CUDA_ERROR_NOT_SUPPORTED * is returned. * Flag for ::cuMemHostRegister() */ public static final int CU_MEMHOSTREGISTER_IOMEMORY = 0x04; /** * If set, the passed memory pointer is treated as pointing to memory that is * considered read-only by the device. On platforms without * CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is * required in order to register memory mapped to the CPU as read-only. Support * for the use of this flag can be queried from the device attribute * CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with * a current context associated with a device that does not have this attribute * set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED. */ public static final int CU_MEMHOSTREGISTER_READ_ONLY = 0x08; /** * Indicates that the layered sparse CUDA array or CUDA mipmapped array * has a single mip tail region for all layers */ public static final int CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = 0x1; /** * This flag if set indicates that the memory will be used as a tile pool. */ public static final int CU_MEM_CREATE_USAGE_TILE_POOL = 0x1; /** * If set, each kernel launched as part of * ::cuLaunchCooperativeKernelMultiDevice only waits for prior work in the * stream corresponding to that GPU to complete before the kernel begins * execution. */ public static final int CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC = 0x01; /** * If set, any subsequent work pushed in a stream that participated in a * call to ::cuLaunchCooperativeKernelMultiDevice will only wait for the * kernel launched on the GPU corresponding to that stream to complete * before it begins execution. */ public static final int CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC = 0x02; /** * If set, the CUDA array is a collection of layers, where each layer is either a 1D * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number * of layers, not the depth of a 3D array. */ public static final int CUDA_ARRAY3D_LAYERED = 0x01; /** * If set, the CUDA array contains an array of 2D slices * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies * the number of slices, not the depth of a 3D array. * @deprecated use CUDA_ARRAY3D_LAYERED */ @Deprecated public static final int CUDA_ARRAY3D_2DARRAY = 0x01; /** * This flag must be set in order to bind a surface reference * to the CUDA array */ public static final int CUDA_ARRAY3D_SURFACE_LDST = 0x02; /** * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The * width of such a CUDA array must be equal to its height, and Depth must be six. * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps * and Depth must be a multiple of six. */ public static final int CUDA_ARRAY3D_CUBEMAP = 0x04; /** * This flag must be set in order to perform texture gather operations * on a CUDA array. */ public static final int CUDA_ARRAY3D_TEXTURE_GATHER = 0x08; /** * This flag if set indicates that the CUDA * array is a DEPTH_TEXTURE. */ public static final int CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10; /** * This flag indicates that the CUDA array may be bound as a color target * in an external graphics API */ public static final int CUDA_ARRAY3D_COLOR_ATTACHMENT = 0x20; /** * This flag if set indicates that the CUDA array or CUDA mipmapped array * is a sparse CUDA array or CUDA mipmapped array respectively */ public static final int CUDA_ARRAY3D_SPARSE = 0x40; /** * For texture references loaded into the module, use default * texunit from texture reference */ public static final int CU_PARAM_TR_DEFAULT = -1; /** * Override the texref format with a format inferred from the array */ public static final int CU_TRSA_OVERRIDE_FORMAT = 0x01; /** * Read the texture as integers rather than promoting the values * to floats in the range [0,1] */ public static final int CU_TRSF_READ_AS_INTEGER = 0x01; /** * Use normalized texture coordinates in the range [0,1) instead of [0,dim) */ public static final int CU_TRSF_NORMALIZED_COORDINATES = 0x02; /** * Perform sRGB->linear conversion during texture read. * Flag for JCudaDriver#cuTexRefSetFlags() */ public static final int CU_TRSF_SRGB = 0x10; /** * Specifies a stream callback does not block the stream while * executing. This is the default behavior. * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)} * * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) * and may be removed (or added again) in future releases */ @Deprecated public static final int CU_STREAM_CALLBACK_NONBLOCKING = 0x00; /** * If set, the stream callback blocks the stream until it is * done executing. * Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)} * * @deprecated This flag was only present in CUDA 5.0.25 (release candidate) * and may be removed (or added again) in future releases */ @Deprecated public static final int CU_STREAM_CALLBACK_BLOCKING = 0x01; /** * Disable any trilinear filtering optimizations. * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate() */ public static final int CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 0x20; /** * Private inner class for the constant pointer values * CU_LAUNCH_PARAM_END, CU_LAUNCH_PARAM_BUFFER_POINTER, * and CU_LAUNCH_PARAM_BUFFER_SIZE. * * TODO: These constants could be misused: There is no * mechanism for preventing these Pointers to be used * for memory allocation. However, at the moment there * is no other way for emulating these pointer constants. */ private static class ConstantPointer extends Pointer { private ConstantPointer(long value) { super(value); } } /** * End of array terminator for the \p extra parameter to * ::cuLaunchKernel */ public static final Pointer CU_LAUNCH_PARAM_END = new ConstantPointer(0); // ((void*)0x00) /** * Indicator that the next value in the \p extra parameter to * ::cuLaunchKernel will be a pointer to a buffer containing all kernel * parameters used for launching kernel \p f. This buffer needs to * honor all alignment/padding requirements of the individual parameters. * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no * effect. */ public static final Pointer CU_LAUNCH_PARAM_BUFFER_POINTER = new ConstantPointer(1); //((void*)0x01) /** * Indicator that the next value in the \p extra parameter to * ::cuLaunchKernel will be a pointer to a size_t which contains the * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER. * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified * in the \p extra array if the value associated with * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. */ public static final Pointer CU_LAUNCH_PARAM_BUFFER_SIZE = new ConstantPointer(2); // ((void*)0x02) /** * Device that represents the CPU */ public static final CUdevice CU_DEVICE_CPU = new CUdevice(-1); /** * Device that represents an invalid device */ public static final CUdevice CU_DEVICE_INVALID = new CUdevice(-2); /** * Stream handle that can be passed as a CUstream to use an implicit stream * with legacy synchronization behavior. */ public static final CUstream CU_STREAM_LEGACY = new CUstream(0x1); /** * Stream handle that can be passed as a CUstream to use an implicit stream * with per-thread synchronization behavior. */ public static final CUstream CU_STREAM_PER_THREAD = new CUstream(0x2); /** * Whether a CudaException should be thrown if a method is about * to return a result code that is not CUresult.CUDA_SUCCESS */ private static boolean exceptionsEnabled = false; static { String libraryBaseName = "JCudaDriver-" + JCudaVersion.get(); String libraryName = LibUtils.createPlatformLibraryName(libraryBaseName); LibUtilsCuda.loadLibrary(libraryName); } /* Private constructor to prevent instantiation */ private JCudaDriver() { } /** * Set the specified log level for the JCuda driver library.
*
* Currently supported log levels: *
* LOG_QUIET: Never print anything
* LOG_ERROR: Print error messages
* LOG_TRACE: Print a trace of all native function calls
* * @param logLevel The log level to use. */ public static void setLogLevel(LogLevel logLevel) { setLogLevel(logLevel.ordinal()); } private static native void setLogLevel(int logLevel); /** * Enables or disables exceptions. By default, the methods of this class * only return the CUresult error code from the underlying CUDA function. * If exceptions are enabled, a CudaException with a detailed error * message will be thrown if a method is about to return a result code * that is not CUresult.CUDA_SUCCESS * * @param enabled Whether exceptions are enabled */ public static void setExceptionsEnabled(boolean enabled) { exceptionsEnabled = enabled; } /** * If the given result is different to CUresult.CUDA_SUCCESS and * exceptions have been enabled, this method will throw a * CudaException with an error message that corresponds to the * given result code. Otherwise, the given result is simply * returned. * * @param result The result to check * @return The result that was given as the parameter * @throws CudaException If exceptions have been enabled and * the given result code is not CUresult.CUDA_SUCCESS */ private static int checkResult(int result) { if (exceptionsEnabled && result != CUresult.CUDA_SUCCESS) { throw new CudaException(CUresult.stringFor(result)); } return result; } /** * Returns the given (address) value, adjusted to have * the given alignment. This function may be used to * align the parameters for a kernel call according * to their alignment requirements. * * @param value The address value * @param alignment The desired alignment * @return The aligned address value * @deprecated This method was intended for a simpler * kernel parameter setup in earlier CUDA versions, * and should not be required any more. It may be * removed in future releases. */ @Deprecated public static int align(int value, int alignment) { return (((value) + (alignment) - 1) & ~((alignment) - 1)); } /** * A wrapper function for * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)} * which allows passing in the options for the JIT compiler, and obtaining * the output of the JIT compiler via a {@link JITOptions} object.
*
* Note: This method should be considered as preliminary, * and might change in future releases. * */ public static int cuModuleLoadDataJIT(CUmodule module, Pointer pointer, JITOptions jitOptions) { return cuModuleLoadDataJITNative(module, pointer, jitOptions); } private static native int cuModuleLoadDataJITNative(CUmodule module, Pointer pointer, JITOptions jitOptions); /** * A wrapper function for * {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)} * which allows passing in the image data as a string. * * @param module Returned module * @param image Module data to load * @param numOptions Number of options * @param options Options for JIT * @param optionValues Option values for JIT * @return The return code from cuModuleLoadDataEx * * @see #cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer) */ public static int cuModuleLoadDataEx(CUmodule phMod, String string, int numOptions, int options[], Pointer optionValues) { byte bytes[] = string.getBytes(); byte image[] = Arrays.copyOf(bytes, bytes.length+1); return cuModuleLoadDataEx(phMod, Pointer.to(image), numOptions, options, optionValues); } /** * A wrapper function for {@link #cuModuleLoadData(CUmodule, byte[])} * that converts the given string into a zero-terminated byte array. * * @param module The module * @param string The data. May not be null. * @return The return code from cuModuleLoadData * * @see #cuModuleLoadData(CUmodule, byte[]) */ public static int cuModuleLoadData(CUmodule module, String string) { byte bytes[] = string.getBytes(); byte image[] = Arrays.copyOf(bytes, bytes.length+1); return cuModuleLoadData(module, image); } /** *
     * Gets the string description of an error code
     *
     * Sets *pStr to the address of a NULL-terminated string description
     * of the error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
* * @param error - Error code to convert to string * @param pStr - Address of the string pointer. * * @return * ::CUDA_SUCCESS, * ::CUDA_ERROR_INVALID_VALUE * * @see CUresult */ public static int cuGetErrorString(int error, String pStr[]) { return checkResult(cuGetErrorStringNative(error, pStr)); } private static native int cuGetErrorStringNative(int error, String pStr[]); /** *
     * Gets the string representation of an error code enum name
     *
     * Sets *pStr to the address of a NULL-terminated string representation
     * of the name of the enum error code error.
     * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
     * will be returned and *pStr will be set to the NULL address.
     * 
* @param error - Error code to convert to string * @param pStr - Address of the string pointer. * * @return * ::CUDA_SUCCESS, * ::CUDA_ERROR_INVALID_VALUE * * @see CUresult */ public static int cuGetErrorName(int error, String pStr[]) { return checkResult(cuGetErrorNameNative(error, pStr)); } private static native int cuGetErrorNameNative(int error, String pStr[]); /** * Initialize the CUDA driver API. * *
     * CUresult cuInit (
     *      unsigned int  Flags )
     * 
*
*

Initialize the CUDA driver API. * Initializes the driver API and must be called before any other function * from the driver API. * Currently, the Flags parameter * must be 0. If cuInit() has not been called, any function from the * driver API will return CUDA_ERROR_NOT_INITIALIZED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param Flags Initialization flag for CUDA. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * */ public static int cuInit(int Flags) { return checkResult(cuInitNative(Flags)); } private static native int cuInitNative(int Flags); /** * Returns a handle to a compute device. * *
     * CUresult cuDeviceGet (
     *      CUdevice* device,
     *      int  ordinal )
     * 
*
*

Returns a handle to a compute device. * Returns in *device a device handle given an ordinal in the * range [0, cuDeviceGetCount()-1]. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param device Returned device handle * @param ordinal Device number to get handle for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGet(CUdevice device, int ordinal) { return checkResult(cuDeviceGetNative(device, ordinal)); } private static native int cuDeviceGetNative(CUdevice device, int ordinal); /** * Returns the number of compute-capable devices. * *
     * CUresult cuDeviceGetCount (
     *      int* count )
     * 
*
*

Returns the number of compute-capable * devices. Returns in *count the number of devices with * compute capability greater than or equal to 2.0 that are available for * execution. If there is * no such device, cuDeviceGetCount() * returns 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Returned number of compute-capable devices * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetCount(int count[]) { return checkResult(cuDeviceGetCountNative(count)); } private static native int cuDeviceGetCountNative(int count[]); /** * Returns an identifer string for the device. * *
     * CUresult cuDeviceGetName (
     *      char* name,
     *      int  len,
     *      CUdevice dev )
     * 
*
*

Returns an identifer string for the * device. Returns an ASCII string identifying the device dev * in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param name Returned identifier string for the device * @param len Maximum length of string to store in name * @param dev Device to get identifier string for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetName(byte name[], int len, CUdevice dev) { return checkResult(cuDeviceGetNameNative(name, len, dev)); } private static native int cuDeviceGetNameNative(byte name[], int len, CUdevice dev); /** * Return an UUID for the device. * * Returns 16-octets identifing the device \p dev in the structure * pointed by the \p uuid. * * @param uuid Returned UUID * @param dev Device to get identifier string for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * * @see JCudaDriver#cuDeviceGetAttribute * JCudaDriver#cuDeviceGetCount * JCudaDriver#cuDeviceGetName * JCudaDriver#cuDeviceGet * JCudaDriver#cuDeviceTotalMem * JCudaDriver#cudaGetDeviceProperties */ public static int cuDeviceGetUuid(CUuuid uuid, CUdevice dev) { return checkResult(cuDeviceGetUuidNative(uuid, dev)); } private static native int cuDeviceGetUuidNative(CUuuid uuid, CUdevice dev); /** * Return an LUID and device node mask for the device. * * Return identifying information (\p luid and \p deviceNodeMask) to allow * matching device with graphics APIs. * * @param luid - Returned LUID * @param deviceNodeMask - Returned device node mask * @param dev - Device to get identifier string for * * @return CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * JCudaDriver#cuDeviceGetCount * JCudaDriver#cuDeviceGetName * JCudaDriver#cuDeviceGet * JCudaDriver#cuDeviceTotalMem * JCudaDriver#cudaGetDeviceProperties */ public static int cuDeviceGetLuid(byte luid[], int deviceNodeMask[], CUdevice dev) { return checkResult(cuDeviceGetLuidNative(luid, deviceNodeMask, dev)); } public static native int cuDeviceGetLuidNative(byte luid[], int deviceNodeMask[], CUdevice dev); /** * Returns the compute capability of the device. * *
     * CUresult cuDeviceComputeCapability (
     *      int* major,
     *      int* minor,
     *      CUdevice dev )
     * 
*
*

Returns the compute capability of the * device. * DeprecatedThis function was deprecated * as of CUDA 5.0 and its functionality superceded by * cuDeviceGetAttribute(). *

*

Returns in *major and *minor the major and minor revision numbers that define the * compute capability of the device dev. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param major Major revision number * @param minor Minor revision number * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem * * @deprecated Deprecated as of CUDA 5.0, replaced with {@link JCudaDriver#cuDeviceGetAttribute(int[], int, CUdevice)} */ @Deprecated public static int cuDeviceComputeCapability(int major[], int minor[], CUdevice dev) { return checkResult(cuDeviceComputeCapabilityNative(major, minor, dev)); } private static native int cuDeviceComputeCapabilityNative(int major[], int minor[], CUdevice dev); /** * Retain the primary context on the GPU. * * Retains the primary context on the device. * Once the user successfully retains the primary context, the primary context * will be active and available to the user until the user releases it * with ::cuDevicePrimaryCtxRelease() or resets it with ::cuDevicePrimaryCtxReset(). * Unlike ::cuCtxCreate() the newly retained context is not pushed onto the stack. * * Retaining the primary context for the first time will fail with ::CUDA_ERROR_UNKNOWN * if the compute mode of the device is ::CU_COMPUTEMODE_PROHIBITED. The function * ::cuDeviceGetAttribute() can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to * determine the compute mode of the device. * The nvidia-smi tool can be used to set the compute mode for * devices. Documentation for nvidia-smi can be obtained by passing a * -h option to it. * * Please note that the primary context always supports pinned allocations. Other * flags can be specified by ::cuDevicePrimaryCtxSetFlags(). * * @param pctx Returned context handle of the new context * @param dev - Device for which primary context is requested * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuDevicePrimaryCtxRelease * @see JCudaDriver#cuDevicePrimaryCtxSetFlags * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetFlags * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuDevicePrimaryCtxRetain(CUcontext pctx, CUdevice dev) { return checkResult(cuDevicePrimaryCtxRetainNative(pctx, dev)); } private static native int cuDevicePrimaryCtxRetainNative(CUcontext pctx, CUdevice dev); /** * Release the primary context on the GPU. * * Releases the primary context interop on the device. * A retained context should always be released once the user is done using * it. The context is automatically reset once the last reference to it is * released. This behavior is different when the primary context was retained * by the CUDA runtime from CUDA 4.0 and earlier. In this case, the primary * context remains always active. * * Releasing a primary context that has not been previously retained will * fail with ::CUDA_ERROR_INVALID_CONTEXT. * * Please note that unlike ::cuCtxDestroy() this method does not pop the context * from stack in any circumstances. * * @param dev Device which primary context is released * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuDevicePrimaryCtxRetain * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetFlags * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuDevicePrimaryCtxRelease(CUdevice dev) { return checkResult(cuDevicePrimaryCtxReleaseNative(dev)); } private static native int cuDevicePrimaryCtxReleaseNative(CUdevice dev); /** * Set flags for the primary context. * * Sets the flags for the primary context on the device overwriting perviously * set ones. * * The three LSBs of the \p flags parameter can be used to control how the OS * thread, which owns the CUDA context at the time of an API call, interacts * with the OS scheduler when waiting for results from the GPU. Only one of * the scheduling flags can be set when creating a context. *
*
* CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for * results from the GPU. This can decrease latency when waiting for the GPU, * but may lower the performance of CPU threads if they are performing work in * parallel with the CUDA thread. *
*
* CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for * results from the GPU. This can increase latency when waiting for the GPU, * but can increase the performance of CPU threads performing work in parallel * with the GPU. *
*
* CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a * synchronization primitive when waiting for the GPU to finish work. *
*
* CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a * synchronization primitive when waiting for the GPU to finish work.
* Deprecated: This flag was deprecated as of CUDA 4.0 and was * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. *
*
* CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, * uses a heuristic based on the number of active CUDA contexts in the * process \e C and the number of logical processors in the system \e P. If * \e C > \e P, then CUDA will yield to other OS threads when waiting for * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN). * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC * for low-powered devices. *
*
* CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory * after resizing local memory for a kernel. This can prevent thrashing by * local memory allocations when launching many kernels with high local * memory usage at the cost of potentially increased memory usage.
* Deprecated: This flag is deprecated and the behavior enabled * by this flag is now the default and cannot be disabled. * * @param dev Device for which the primary context flags are set * @param flags New flags for the device * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuDevicePrimaryCtxRetain * @see JCudaDriver#cuDevicePrimaryCtxGetState * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxGetFlags * @see JCudaDriver#cudaSetDeviceFlags */ public static int cuDevicePrimaryCtxSetFlags(CUdevice dev, int flags) { return checkResult(cuDevicePrimaryCtxSetFlagsNative(dev, flags)); } private static native int cuDevicePrimaryCtxSetFlagsNative(CUdevice dev, int flags); /** * Get the state of the primary context. * * Returns in \p *flags the flags for the primary context of \p dev, and in * \p *active whether it is active. See ::cuDevicePrimaryCtxSetFlags for flag * values. * * @param dev Device to get primary context flags for * @param flags Pointer to store flags * @param active Pointer to store context state; 0 = inactive, 1 = active * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuDevicePrimaryCtxSetFlags, * @see JCudaDriver#cuCtxGetFlags, * @see JCudaDriver#cudaGetDeviceFlags */ public static int cuDevicePrimaryCtxGetState(CUdevice dev, int flags[], int active[]) { return checkResult(cuDevicePrimaryCtxGetStateNative(dev, flags, active)); } private static native int cuDevicePrimaryCtxGetStateNative(CUdevice dev, int flags[], int active[]); /** * Destroy all allocations and reset all state on the primary context. * * Explicitly destroys and cleans up all resources associated with the current * device in the current process. * * Note that it is responsibility of the calling function to ensure that no * other module in the process is using the device any more. For that reason * it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases. * However it is safe for other modules to call ::cuDevicePrimaryCtxRelease() * even after resetting the device. * Resetting the primary context does not release it, an application that has * retained the primary context should explicitly release its usage. * * @param dev Device for which primary context is destroyed * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE * * @see JCudaDriver#cuDevicePrimaryCtxRetain * @see JCudaDriver#cuDevicePrimaryCtxRelease * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetFlags * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cudaDeviceReset */ public static int cuDevicePrimaryCtxReset(CUdevice dev) { return checkResult(cuDevicePrimaryCtxResetNative(dev)); } private static native int cuDevicePrimaryCtxResetNative(CUdevice dev); /** * Returns the total amount of memory on the device. * *
     * CUresult cuDeviceTotalMem (
     *      size_t* bytes,
     *      CUdevice dev )
     * 
*
*

Returns the total amount of memory on * the device. Returns in *bytes the total amount of memory * available on the device dev in bytes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param bytes Returned memory available on device in bytes * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet */ public static int cuDeviceTotalMem(long bytes[], CUdevice dev) { return checkResult(cuDeviceTotalMemNative(bytes, dev)); } private static native int cuDeviceTotalMemNative(long bytes[], CUdevice dev); /** * Returns the maximum number of elements allocatable in a 1D linear * texture for a given texture element size. * * Returns in \p maxWidthInElements the maximum number of texture elements * allocatable in a 1D linear texture for given \p format and \p numChannels. * * @param maxWidthInElements Returned maximum number of texture elements allocatable for given \p format and \p numChannels. * @param format Texture format. * @param numChannels Number of channels per texture element. * @param dev Device handle. * * @return * CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute, * @see JCudaDriver#cuDeviceGetCount, * @see JCudaDriver#cuDeviceGetName, * @see JCudaDriver#cuDeviceGetUuid, * @see JCudaDriver#cuDeviceGet, * @see JCudaDriver#cudaMemGetInfo * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetTexture1DLinearMaxWidth(long maxWidthInElements[], int format, int numChannels, CUdevice dev) { return checkResult(cuDeviceGetTexture1DLinearMaxWidthNative(maxWidthInElements, format, numChannels, dev)); } private static native int cuDeviceGetTexture1DLinearMaxWidthNative(long maxWidthInElements[], int format, int numChannels, CUdevice dev); /** * Returns properties for a selected device. * *
     * CUresult cuDeviceGetProperties (
     *      CUdevprop* prop,
     *      CUdevice dev )
     * 
*
*

Returns properties for a selected device. * DeprecatedThis function was deprecated * as of CUDA 5.0 and replaced by cuDeviceGetAttribute(). *

*

Returns in *prop the properties * of device dev. The CUdevprop structure is defined as: *

*
     typedef struct CUdevprop_st {
     *      int maxThreadsPerBlock;
     *      int maxThreadsDim[3];
     *      int maxGridSize[3];
     *      int sharedMemPerBlock;
     *      int totalConstantMemory;
     *      int SIMDWidth;
     *      int memPitch;
     *      int regsPerBlock;
     *      int clockRate;
     *      int textureAlign
     *   } CUdevprop;
* where:

*
    *
  • *

    maxThreadsPerBlock is the * maximum number of threads per block; *

    *
  • *
  • *

    maxThreadsDim[3] is the maximum * sizes of each dimension of a block; *

    *
  • *
  • *

    maxGridSize[3] is the maximum * sizes of each dimension of a grid; *

    *
  • *
  • *

    sharedMemPerBlock is the total * amount of shared memory available per block in bytes; *

    *
  • *
  • *

    totalConstantMemory is the * total amount of constant memory available on the device in bytes; *

    *
  • *
  • *

    SIMDWidth is the warp * size; *

    *
  • *
  • *

    memPitch is the maximum pitch * allowed by the memory copy functions that involve memory regions * allocated through cuMemAllocPitch(); *

    *
  • *
  • *

    regsPerBlock is the total * number of registers available per block; *

    *
  • *
  • *

    clockRate is the clock frequency * in kilohertz; *

    *
  • *
  • *

    textureAlign is the alignment * requirement; texture base addresses that are aligned to textureAlign * bytes do not need an offset * applied to texture fetches. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param prop Returned properties of device * @param dev Device to get properties for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem * * @deprecated Deprecated as of CUDA 5.0, replaced with {@link JCudaDriver#cuDeviceGetAttribute(int[], int, CUdevice)} */ @Deprecated public static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev) { return checkResult(cuDeviceGetPropertiesNative(prop, dev)); } private static native int cuDeviceGetPropertiesNative(CUdevprop prop, CUdevice dev); /** * Returns information about the device. * *
     * CUresult cuDeviceGetAttribute (
     *      int* pi,
     *      CUdevice_attribute attrib,
     *      CUdevice dev )
     * 
*
*

Returns information about the device. * Returns in *pi the integer value of the attribute attrib on device dev. The supported attributes are: *

    *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads * per block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: * Maximum x-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: * Maximum y-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: * Maximum z-dimension of a block; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: * Maximum x-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: * Maximum y-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: * Maximum z-dimension of a grid; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of * shared memory available to a thread block in bytes; this amount is * shared by all thread blocks simultaneously * resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device * for __constant__ variables in a CUDA C kernel in bytes; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_WARP_SIZE: * Warp size in threads; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_PITCH: * Maximum pitch in bytes allowed by the memory copy functions that * involve memory regions allocated through cuMemAllocPitch(); *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width for * a 1D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum * mipmapped 1D texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D texture * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width for * a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height * for a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch in * bytes for a 2D texture bound to linear memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum * mipmapped 2D texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum * mipmapped 2D texture height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D texture * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D texture * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D texture * depth; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: Alternate * maximum 3D texture width, 0 if no alternate maximum 3D texture size is * supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: Alternate * maximum 3D texture height, 0 if no alternate maximum 3D texture size * is supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: Alternate * maximum 3D texture depth, 0 if no alternate maximum 3D texture size is * supported; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: Maximum cubemap * texture width or height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: Maximum 1D * layered texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: Maximum layers * in a 1D layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: Maximum 2D * layered texture width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: Maximum 2D * layered texture height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: Maximum layers * in a 2D layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: Maximum * cubemap layered texture width or height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: Maximum * layers in a cubemap layered texture; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: Maximum 1D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: Maximum 2D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: Maximum 2D surface * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: Maximum 3D surface * width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: Maximum 3D surface * height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: Maximum 3D surface * depth; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: Maximum 1D * layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: Maximum layers * in a 1D layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: Maximum 2D * layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: Maximum 2D * layered surface height; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: Maximum layers * in a 2D layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: Maximum cubemap * surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: Maximum * cubemap layered surface width; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: Maximum * layers in a cubemap layered surface; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit * registers available to a thread block; this number is shared by all * thread blocks simultaneously * resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CLOCK_RATE: * Typical clock frequency in kilohertz; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: * Alignment requirement; texture base addresses aligned to textureAlign * bytes do not need an offset applied to texture fetches; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment * requirement for 2D texture references bound to pitched memory; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: * 1 if the device can concurrently copy memory between host and device * while executing a kernel, or 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors * on the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: * 1 if there is a run time limit for kernels executed on the device, or * 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_INTEGRATED: * 1 if the device is integrated with the memory subsystem, or 0 if not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: * 1 if the device can map host memory into the CUDA address space, or 0 * if not; *

    *
  • *
  • *
    * CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: * Compute mode that device is currently in. Available modes are as * follows: *
      *
    • *

      CU_COMPUTEMODE_DEFAULT: * Default mode - Device is not restricted and can have multiple CUDA * contexts present at a single time. *

      *
    • *
    • *

      CU_COMPUTEMODE_EXCLUSIVE: * Compute-exclusive mode - Device can have only one CUDA context present * on it at a time. *

      *
    • *
    • *

      CU_COMPUTEMODE_PROHIBITED: * Compute-prohibited mode - Device is prohibited from creating new CUDA * contexts. *

      *
    • *
    • *

      CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode - * Device can have only one context used by a single process at a time. *

      *
    • *
    *
    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: * 1 if the device supports executing multiple kernels within the same * context simultaneously, or 0 if not. It is not guaranteed * that multiple kernels will be * resident on the device concurrently so this feature should not be * relied upon for correctness; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_ECC_ENABLED: * 1 if error correction is enabled on the device, 0 if error correction * is disabled or not supported by the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: * PCI bus identifier of the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: * PCI device (also known as slot) identifier of the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID: * PCI domain identifier of the device *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_TCC_DRIVER: * 1 if the device is using a TCC driver. TCC is only available on Tesla * hardware running Windows Vista or later; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: * Peak memory clock frequency in kilohertz; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width * in bits; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: * Size of L2 cache in bytes. 0 if the device doesn't have L2 cache; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident * threads per multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: * 1 if the device shares a unified address space with the host, or 0 if * not; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability * version number; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability * version number; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: 1 if device supports caching globals * in L1 cache, 0 if caching globals in L1 cache is not supported by the device *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: 1 if device supports caching locals * in L1 cache, 0 if caching locals in L1 cache is not supported by the device; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: Maximum amount of * shared memory available to a multiprocessor in bytes; this amount is shared * by all thread blocks simultaneously resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: Maximum number of 32-bit * registers available to a multiprocessor; this number is shared by all thread * blocks simultaneously resident on a multiprocessor; *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY: 1 if device supports allocating managed memory * on this system, 0 if allocating managed memory is not supported by the device on this system. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: 1 if device is on a multi-GPU board, 0 if not. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: Unique identifier for a group of devices * associated with the same board. Devices on the same multi-GPU board will share the same identifier. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: 1 if Link between the device and the host * supports native atomic operations. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance * (in floating-point operations per second) to double precision performance. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device suppports coherently accessing * pageable memory without calling cudaHostRegister on it. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory * concurrently with the CPU. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: Device supports Compute Preemption. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered * memory at the same virtual address as the CPU. *

    *
  • *
  • *

    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size * suported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() call. * For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pi Returned device attribute value * @param attrib Device attribute to query * @param dev Device handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGetCount * @see JCudaDriver#cuDeviceGetName * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceTotalMem */ public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev) { return checkResult(cuDeviceGetAttributeNative(pi, attrib, dev)); } private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev); /** * Returns the latest CUDA version supported by driver. * *
     * CUresult cuDriverGetVersion (
     *      int* driverVersion )
     * 
*
*

Returns the CUDA driver version. Returns * in *driverVersion the version of CUDA supported by * the driver. * The version is returned as (1000 * major + 10 * minor). * For example, CUDA 9.2 would be represented by 9020. * This function automatically returns CUDA_ERROR_INVALID_VALUE * if the driverVersion argument is NULL. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param driverVersion Returns the CUDA driver version * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * */ public static int cuDriverGetVersion (int driverVersion[]) { return checkResult(cuDriverGetVersionNative(driverVersion)); } private static native int cuDriverGetVersionNative(int driverVersion[]); /** * Create a CUDA context. * *
     * CUresult cuCtxCreate (
     *      CUcontext* pctx,
     *      unsigned int  flags,
     *      CUdevice dev )
     * 
*
*

Create a CUDA context. Creates a new * CUDA context and associates it with the calling thread. The flags parameter is described below. The context is created with * a usage count of 1 and the caller of cuCtxCreate() must call * cuCtxDestroy() or when done using the context. If a context is already * current to the thread, it is supplanted by the newly created context * and may be restored by a subsequent call * to cuCtxPopCurrent(). *

*

The three LSBs of the flags * parameter can be used to control how the OS thread, which owns the CUDA * context at the time of an API call, interacts with * the OS scheduler when waiting for results * from the GPU. Only one of the scheduling flags can be set when creating * a context. *

*
    *
  • *

    CU_CTX_SCHED_AUTO: The default * value if the flags parameter is zero, uses a heuristic based * on the number of active CUDA contexts in the process C and the number * of logical * processors in the system P. If * C > P, then CUDA will yield to other OS threads when waiting for * the GPU, otherwise CUDA will * not yield while waiting for * results and actively spin on the processor. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_SPIN: Instruct * CUDA to actively spin when waiting for results from the GPU. This can * decrease latency when waiting for the GPU, * but may lower the performance * of CPU threads if they are performing work in parallel with the CUDA * thread. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_YIELD: Instruct * CUDA to yield its thread when waiting for results from the GPU. This * can increase latency when waiting for the * GPU, but can increase the * performance of CPU threads performing work in parallel with the GPU. *

    *
  • *
*

*
    *
  • *

    CU_CTX_SCHED_BLOCKING_SYNC: * Instruct CUDA to block the CPU thread on a synchronization primitive * when waiting for the GPU to finish work. *

    *
  • *
*

*
    *
  • *

    CU_CTX_BLOCKING_SYNC: Instruct * CUDA to block the CPU thread on a synchronization primitive when * waiting for the GPU to finish work. *

    *

    Deprecated: * This flag was deprecated as of CUDA 4.0 and was replaced with * CU_CTX_SCHED_BLOCKING_SYNC. *

    *
  • *
*

*
    *
  • *

    CU_CTX_MAP_HOST: Instruct CUDA * to support mapped pinned allocations. This flag must be set in order * to allocate pinned host memory that is * accessible to the GPU. *

    *
  • *
*

*
    *
  • *

    CU_CTX_LMEM_RESIZE_TO_MAX: * Instruct CUDA to not reduce local memory after resizing local memory * for a kernel. This can prevent thrashing by local memory * allocations when launching many * kernels with high local memory usage at the cost of potentially * increased memory usage. *

    *
  • *
*

*

Context creation will fail with * CUDA_ERROR_UNKNOWN if the compute mode of the device is * CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail * with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to * CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the * device. The function cuDeviceGetAttribute() can be used with * CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the * device. The nvidia-smi tool can be used to set the compute mode for * devices. Documentation * for nvidia-smi can be obtained by passing * a -h option to it. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle of the new context * @param flags Context creation flags * @param dev Device to create context on * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev) { return checkResult(cuCtxCreateNative(pctx, flags, dev)); } private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev); /** * Destroy a CUDA context. * *
     * CUresult cuCtxDestroy (
     *      CUcontext ctx )
     * 
*
*

Destroy a CUDA context. Destroys the * CUDA context specified by ctx. The context ctx will * be destroyed regardless of how many threads it is current to. It is * the responsibility of the calling function to ensure * that no API call issues using ctx while cuCtxDestroy() is executing. *

*

If ctx is current to the * calling thread then ctx will also be popped from the current * thread's context stack (as though cuCtxPopCurrent() were called). If * ctx is current to other threads, then ctx will * remain current to those threads, and attempting to access ctx * from those threads will result in the error * CUDA_ERROR_CONTEXT_IS_DESTROYED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxDestroy(CUcontext ctx) { return checkResult(cuCtxDestroyNative(ctx)); } private static native int cuCtxDestroyNative(CUcontext ctx); /** * Increment a context's usage-count. * *
     * CUresult cuCtxAttach (
     *      CUcontext* pctx,
     *      unsigned int  flags )
     * 
*
*

Increment a context's usage-count. * DeprecatedNote that this function is * deprecated and should not be used. *

*

Increments the usage count of the * context and passes back a context handle in *pctx that must * be passed to cuCtxDetach() when the application is done with the * context. cuCtxAttach() fails if there is no context current to the * thread. *

*

Currently, the flags parameter * must be 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle of the current context * @param flags Context attach flags (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxDetach * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuCtxAttach(CUcontext pctx, int flags) { return checkResult(cuCtxAttachNative(pctx, flags)); } private static native int cuCtxAttachNative(CUcontext pctx, int flags); /** * Decrement a context's usage-count. * *
     * CUresult cuCtxDetach (
     *      CUcontext ctx )
     * 
*
*

Decrement a context's usage-count. * DeprecatedNote that this function is * deprecated and should not be used. *

*

Decrements the usage count of the * context ctx, and destroys the context if the usage count goes * to 0. The context must be a handle that was passed back by cuCtxCreate() * or cuCtxAttach(), and must be current to the calling thread. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuCtxDetach(CUcontext ctx) { return checkResult(cuCtxDetachNative(ctx)); } private static native int cuCtxDetachNative(CUcontext ctx); /** * Pushes a context on the current CPU thread. * *
     * CUresult cuCtxPushCurrent (
     *      CUcontext ctx )
     * 
*
*

Pushes a context on the current CPU * thread. Pushes the given context ctx onto the CPU thread's * stack of current contexts. The specified context becomes the CPU * thread's current context, so all CUDA * functions that operate on the current * context are affected. *

*

The previous current context may be made * current again by calling cuCtxDestroy() or cuCtxPopCurrent(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to push * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxPushCurrent(CUcontext ctx) { return checkResult(cuCtxPushCurrentNative(ctx)); } private static native int cuCtxPushCurrentNative(CUcontext ctx); /** * Pops the current CUDA context from the current CPU thread. * *
     * CUresult cuCtxPopCurrent (
     *      CUcontext* pctx )
     * 
*
*

Pops the current CUDA context from the * current CPU thread. Pops the current CUDA context from the CPU thread * and passes back * the old context handle in *pctx. * That context may then be made current to a different CPU thread by * calling cuCtxPushCurrent(). *

*

If a context was current to the CPU * thread before cuCtxCreate() or cuCtxPushCurrent() was called, this * function makes that context current to the CPU thread again. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned new context handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxPopCurrent(CUcontext pctx) { return checkResult(cuCtxPopCurrentNative(pctx)); } private static native int cuCtxPopCurrentNative(CUcontext pctx); /** * Binds the specified CUDA context to the calling CPU thread. * *
     * CUresult cuCtxSetCurrent (
     *      CUcontext ctx )
     * 
*
*

Binds the specified CUDA context to the * calling CPU thread. Binds the specified CUDA context to the calling * CPU thread. If * ctx is NULL then the CUDA * context previously bound to the calling CPU thread is unbound and * CUDA_SUCCESS is returned. *

*

If there exists a CUDA context stack on * the calling CPU thread, this will replace the top of that stack with * ctx. If ctx is NULL then this will be equivalent * to popping the top of the calling CPU thread's CUDA context stack (or * a no-op if the * calling CPU thread's CUDA context stack * is empty). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to bind to the calling CPU thread * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCurrent * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy */ public static int cuCtxSetCurrent(CUcontext ctx) { return checkResult(cuCtxSetCurrentNative(ctx)); } private static native int cuCtxSetCurrentNative(CUcontext ctx); /** * Returns the CUDA context bound to the calling CPU thread. * *
     * CUresult cuCtxGetCurrent (
     *      CUcontext* pctx )
     * 
*
*

Returns the CUDA context bound to the * calling CPU thread. Returns in *pctx the CUDA context bound * to the calling CPU thread. If no context is bound to the calling CPU * thread then *pctx is set to NULL and CUDA_SUCCESS is * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pctx Returned context handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * * @see JCudaDriver#cuCtxSetCurrent * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy */ public static int cuCtxGetCurrent(CUcontext pctx) { return checkResult(cuCtxGetCurrentNative(pctx)); } private static native int cuCtxGetCurrentNative(CUcontext pctx); /** * Returns the device ID for the current context. * *
     * CUresult cuCtxGetDevice (
     *      CUdevice* device )
     * 
*
*

Returns the device ID for the current * context. Returns in *device the ordinal of the current * context's device. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param device Returned device ID for the current context * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetDevice(CUdevice device) { return checkResult(cuCtxGetDeviceNative(device)); } private static native int cuCtxGetDeviceNative(CUdevice device); public static int cuCtxGetFlags(int flags[]) { return checkResult(cuCtxGetFlagsNative(flags)); } private static native int cuCtxGetFlagsNative(int flags[]); /** * Block for a context's tasks to complete. * *
     * CUresult cuCtxSynchronize (
     *      void )
     * 
*
*

Block for a context's tasks to complete. * Blocks until the device has completed all preceding requested tasks. * cuCtxSynchronize() returns an error if one of the preceding tasks * failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC * flag, the CPU thread will block until the GPU context has finished its * work. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit */ public static int cuCtxSynchronize() { return checkResult(cuCtxSynchronizeNative()); } private static native int cuCtxSynchronizeNative(); /** * Loads a compute module. * *
     * CUresult cuModuleLoad (
     *      CUmodule* module,
     *      const char* fname )
     * 
*
*

Loads a compute module. Takes a filename * fname and loads the corresponding module module * into the current context. The CUDA driver API does not attempt to * lazily allocate the resources needed by a module; if the * memory for functions and data (constant * and global) needed by the module cannot be allocated, cuModuleLoad() * fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc * or handwritten, or a fatbin file as output by nvcc * from toolchain 4.0 or later. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param fname Filename of module to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoad(CUmodule module, String fname) { return checkResult(cuModuleLoadNative(module, fname)); } private static native int cuModuleLoadNative(CUmodule module, String fname); /** * Load a module's data. * *
     * CUresult cuModuleLoadData (
     *      CUmodule* module,
     *      const void* image )
     * 
*
*

Load a module's data. Takes a pointer * image and loads the corresponding module module * into the current context. The pointer may be obtained by mapping a * cubin or PTX or fatbin file, passing a cubin or PTX or * fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable * resources and * using operating system calls such as * Windows FindResource() to obtain the pointer. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param image Module data to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadData(CUmodule module, byte image[]) { return checkResult(cuModuleLoadDataNative(module, image)); } private static native int cuModuleLoadDataNative(CUmodule module, byte image[]); /** * Load a module's data with options.
*
* Note: It is hardly possible to properly pass in the required * option values for this method. Thus, the arguments here must be
* numOptions=0
* options=new int[0]
* optionValues=Pointer.to(new int[0]))
* For passing in real options, use * {@link #cuModuleLoadDataJIT(CUmodule, Pointer, JITOptions)} instead * *
     * CUresult cuModuleLoadDataEx (
     *      CUmodule* module,
     *      const void* image,
     *      unsigned int  numOptions,
     *      CUjit_option* options,
     *      void** optionValues )
     * 
*
*

Load a module's data with options. Takes * a pointer image and loads the corresponding module module into the current context. The pointer may be obtained by * mapping a cubin or PTX or fatbin file, passing a cubin or PTX or * fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable * resources and * using operating system calls such as * Windows FindResource() to obtain the pointer. Options are * passed as an array via options and any corresponding * parameters are passed in optionValues. The number of total * options is supplied via numOptions. Any outputs will be * returned via optionValues. Supported options are (types for * the option values are specified in parentheses after the option name): *

*
    *
  • *

    CU_JIT_MAX_REGISTERS: (unsigned * int) input specifies the maximum number of registers per thread; *

    *
  • *
  • *

    CU_JIT_THREADS_PER_BLOCK: * (unsigned int) input specifies number of threads per block to target * compilation for; output returns the number of threads * the compiler actually targeted; *

    *
  • *
  • *

    CU_JIT_WALL_TIME: (float) * output returns the float value of wall clock time, in milliseconds, * spent compiling the PTX code; *

    *
  • *
  • *

    CU_JIT_INFO_LOG_BUFFER: (char*) * input is a pointer to a buffer in which to print any informational log * messages from PTX assembly (the buffer size * is specified via option * CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES); *

    *
  • *
  • *

    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: * (unsigned int) input is the size in bytes of the buffer; output is the * number of bytes filled with messages; *

    *
  • *
  • *

    CU_JIT_ERROR_LOG_BUFFER: * (char*) input is a pointer to a buffer in which to print any error log * messages from PTX assembly (the buffer size is specified * via option * CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); *

    *
  • *
  • *

    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: * (unsigned int) input is the size in bytes of the buffer; output is the * number of bytes filled with messages; *

    *
  • *
  • *

    CU_JIT_OPTIMIZATION_LEVEL: * (unsigned int) input is the level of optimization to apply to generated * code (0 - 4), with 4 being the default and highest * level; *

    *
  • *
  • *

    CU_JIT_TARGET_FROM_CUCONTEXT: * (No option value) causes compilation target to be determined based on * current attached context (default); *

    *
  • *
  • *
    * CU_JIT_TARGET: (unsigned int * for enumerated type CUjit_target_enum) input is the compilation target * based on supplied CUjit_target_enum; * possible values are: *
      *
    • *

      CU_TARGET_COMPUTE_10

      *
    • *
    • *

      CU_TARGET_COMPUTE_11

      *
    • *
    • *

      CU_TARGET_COMPUTE_12

      *
    • *
    • *

      CU_TARGET_COMPUTE_13

      *
    • *
    • *

      CU_TARGET_COMPUTE_20

      *
    • *
    *
    *
  • *
  • *
    * CU_JIT_FALLBACK_STRATEGY: * (unsigned int for enumerated type CUjit_fallback_enum) chooses fallback * strategy if matching cubin is not found; possible * values are: *
      *
    • *

      CU_PREFER_PTX

      *
    • *
    • *

      CU_PREFER_BINARY

      *
    • *
    *
    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param image Module data to load * @param numOptions Number of options * @param options Options for JIT * @param optionValues Option values for JIT * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadDataEx (CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues) { // Although it should be possible to pass 'null' for these parameters // when numOptions==0, the driver crashes when they are 'null', so // they are replaced by non-null (but empty) arrays here. // Also see the corresponding notes in the native method. if (numOptions == 0) { if (options == null) { options = new int[0]; } if (optionValues == null) { optionValues = Pointer.to(new int[0]); } } return checkResult(cuModuleLoadDataExNative( phMod, p, numOptions, options, optionValues)); } private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues); /** * Load a module's data. * *
     * CUresult cuModuleLoadFatBinary (
     *      CUmodule* module,
     *      const void* fatCubin )
     * 
*
*

Load a module's data. Takes a pointer * fatCubin and loads the corresponding module module * into the current context. The pointer represents a fat binary object, * which is a collection of different cubin and/or PTX * files, all representing the same device * code, but compiled and optimized for different architectures. *

*

Prior to CUDA 4.0, there was no * documented API for constructing and using fat binary objects by * programmers. Starting with * CUDA 4.0, fat binary objects can be * constructed by providing the -fatbin option to nvcc. * More information can be found in the nvcc document. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param module Returned module * @param fatCubin Fat binary to load * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU, * CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleUnload */ public static int cuModuleLoadFatBinary(CUmodule module, byte fatCubin[]) { return checkResult(cuModuleLoadFatBinaryNative(module, fatCubin)); } private static native int cuModuleLoadFatBinaryNative(CUmodule module, byte fatCubin[]); /** * Unloads a module. * *
     * CUresult cuModuleUnload (
     *      CUmodule hmod )
     * 
*
*

Unloads a module. Unloads a module hmod from the current context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hmod Module to unload * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary */ public static int cuModuleUnload(CUmodule hmod) { return checkResult(cuModuleUnloadNative(hmod)); } private static native int cuModuleUnloadNative(CUmodule hmod); /** * Returns a function handle. * *
     * CUresult cuModuleGetFunction (
     *      CUfunction* hfunc,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a function handle. Returns in * *hfunc the handle of the function of name name * located in module hmod. If no function of that name exists, * cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Returned function handle * @param hmod Module to retrieve function from * @param name Name of function to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name) { return checkResult(cuModuleGetFunctionNative(hfunc, hmod, name)); } private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name); /** * Returns a global pointer from a module. * *
     * CUresult cuModuleGetGlobal (
     *      CUdeviceptr* dptr,
     *      size_t* bytes,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a global pointer from a module. * Returns in *dptr and *bytes the base pointer and * size of the global of name name located in module hmod. If no variable of that name exists, cuModuleGetGlobal() * returns CUDA_ERROR_NOT_FOUND. Both parameters dptr and bytes are optional. If one of them is NULL, it is ignored. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned global device pointer * @param bytes Returned global size in bytes * @param hmod Module to retrieve global from * @param name Name of global to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetGlobal(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name) { return checkResult(cuModuleGetGlobalNative(dptr, bytes, hmod, name)); } private static native int cuModuleGetGlobalNative(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name); /** * Returns a handle to a texture reference. * *
     * CUresult cuModuleGetTexRef (
     *      CUtexref* pTexRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a handle to a texture reference. * Returns in *pTexRef the handle of the texture reference of * name name in the module hmod. If no texture * reference of that name exists, cuModuleGetTexRef() returns * CUDA_ERROR_NOT_FOUND. This texture reference handle should not be * destroyed, since it will be destroyed when the module is unloaded. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pTexRef Returned texture reference * @param hmod Module to retrieve texture reference from * @param name Name of texture reference to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, String name) { return checkResult(cuModuleGetTexRefNative(pTexRef, hmod, name)); } private static native int cuModuleGetTexRefNative(CUtexref pTexRef, CUmodule hmod, String name); /** * Returns a handle to a surface reference. * *
     * CUresult cuModuleGetSurfRef (
     *      CUsurfref* pSurfRef,
     *      CUmodule hmod,
     *      const char* name )
     * 
*
*

Returns a handle to a surface reference. * Returns in *pSurfRef the handle of the surface reference of * name name in the module hmod. If no surface * reference of that name exists, cuModuleGetSurfRef() returns * CUDA_ERROR_NOT_FOUND. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pSurfRef Returned surface reference * @param hmod Module to retrieve surface reference from * @param name Name of surface reference to retrieve * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_FOUND * * @see JCudaDriver#cuModuleGetFunction * @see JCudaDriver#cuModuleGetGlobal * @see JCudaDriver#cuModuleGetTexRef * @see JCudaDriver#cuModuleLoad * @see JCudaDriver#cuModuleLoadData * @see JCudaDriver#cuModuleLoadDataEx * @see JCudaDriver#cuModuleLoadFatBinary * @see JCudaDriver#cuModuleUnload */ public static int cuModuleGetSurfRef(CUsurfref pSurfRef, CUmodule hmod, String name) { return checkResult(cuModuleGetSurfRefNative(pSurfRef, hmod, name)); } private static native int cuModuleGetSurfRefNative(CUsurfref pSurfRef, CUmodule hmod, String name); public static int cuLinkCreate(JITOptions jitOptions, CUlinkState stateOut) { return checkResult(cuLinkCreateNative(jitOptions, stateOut)); } private static native int cuLinkCreateNative(JITOptions jitOptions, CUlinkState stateOut); public static int cuLinkAddData(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions) { return checkResult(cuLinkAddDataNative(state, type, data, size, name, jitOptions)); } private static native int cuLinkAddDataNative(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions); public static int cuLinkAddFile(CUlinkState state, int type, String path, JITOptions jitOptions) { return checkResult(cuLinkAddFileNative(state, type, path, jitOptions)); } private static native int cuLinkAddFileNative(CUlinkState state, int type, String path, JITOptions jitOptions); public static int cuLinkComplete(CUlinkState state, Pointer cubinOut, long sizeOut[]) { return checkResult(cuLinkCompleteNative(state, cubinOut, sizeOut)); } private static native int cuLinkCompleteNative(CUlinkState state, Pointer cubinOut, long sizeOut[]); public static int cuLinkDestroy(CUlinkState state) { return checkResult(cuLinkDestroyNative(state)); } private static native int cuLinkDestroyNative(CUlinkState state); /** * Gets free and total memory. * *
     * CUresult cuMemGetInfo (
     *      size_t* free,
     *      size_t* total )
     * 
*
*

Gets free and total memory. Returns in * *free and *total respectively, the free and total * amount of memory available for allocation by the CUDA context, in * bytes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param free Returned free memory in bytes * @param total Returned total memory in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemGetInfo(long free[], long total[]) { return checkResult(cuMemGetInfoNative(free, total)); } private static native int cuMemGetInfoNative(long free[], long total[]); /** * Allocates page-locked host memory. * *
     * CUresult cuMemHostAlloc (
     *      void** pp,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
*
*

Allocates page-locked host memory. * Allocates bytesize bytes of host memory that is page-locked * and accessible to the device. The driver tracks the virtual memory * ranges allocated * with this function and automatically * accelerates calls to functions such as cuMemcpyHtoD(). Since the memory * can be accessed directly by the device, it can be read or written with * much higher bandwidth than pageable * memory obtained with functions such as * malloc(). Allocating excessive amounts of pinned memory may degrade * system performance, * since it reduces the amount of memory * available to the system for paging. As a result, this function is best * used sparingly * to allocate staging areas for data * exchange between host and device. *

*

The Flags parameter enables * different options to be specified that affect the allocation, as * follows. *

*
    *
  • *

    CU_MEMHOSTALLOC_PORTABLE: The * memory returned by this call will be considered as pinned memory by * all CUDA contexts, not just the one that performed * the allocation. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTALLOC_DEVICEMAP: Maps * the allocation into the CUDA address space. The device pointer to the * memory may be obtained by calling cuMemHostGetDevicePointer(). This * feature is available only on GPUs with compute capability greater than * or equal to 1.1. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTREGISTER_IOMEMORY: * The pointer is treated as pointing to some * I/O memory space, e.g. the PCI Express resource of a 3rd party device. *

    *
  • *
*

*
    *
  • *

    CU_MEMHOSTALLOC_WRITECOMBINED: * Allocates the memory as write-combined (WC). WC memory can be * transferred across the PCI Express bus more quickly on some * system configurations, but * cannot be read efficiently by most CPUs. WC memory is a good option * for buffers that will be written * by the CPU and read by the GPU * via mapped pinned memory or host->device transfers. *

    *
  • *
*

*

All of these flags are orthogonal to * one another: a developer may allocate memory that is portable, mapped * and/or write-combined * with no restrictions. *

*

The CUDA context must have been created * with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_DEVICEMAP * flag to have any effect. *

*

The CU_MEMHOSTALLOC_DEVICEMAP flag may * be specified on CUDA contexts for devices that do not support mapped * pinned memory. The failure is deferred to cuMemHostGetDevicePointer() * because the memory may be mapped into other CUDA contexts via the * CU_MEMHOSTALLOC_PORTABLE flag. *

*

The memory allocated by this function * must be freed with cuMemFreeHost(). *

*

Note all host memory allocated using * cuMemHostAlloc() will automatically be immediately accessible to all * contexts on all devices which support unified addressing (as may be * queried * using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). * Unless the flag CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device * pointer that may be used to access this host memory from those contexts * is always equal to the returned * host pointer *pp. If the flag * CU_MEMHOSTALLOC_WRITECOMBINED is specified, then the function * cuMemHostGetDevicePointer() must be used to query the device pointer, * even if the context supports unified addressing. See Unified Addressing * for additional details. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pp Returned host pointer to page-locked memory * @param bytesize Requested allocation size in bytes * @param Flags Flags for allocation request * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemHostAlloc(Pointer pp, long bytes, int Flags) { return checkResult(cuMemHostAllocNative(pp, bytes, Flags)); } private static native int cuMemHostAllocNative(Pointer pp, long bytes, int Flags); /** * Passes back device pointer of mapped pinned memory. * *
     * CUresult cuMemHostGetDevicePointer (
     *      CUdeviceptr* pdptr,
     *      void* p,
     *      unsigned int  Flags )
     * 
*
*

Passes back device pointer of mapped * pinned memory. Passes back the device pointer pdptr * corresponding to the mapped, pinned host buffer p allocated * by cuMemHostAlloc. *

*

cuMemHostGetDevicePointer() will fail * if the CU_MEMHOSTALLOC_DEVICEMAP flag was not specified at the time * the memory was allocated, or if the function is called on a GPU that * does not support * mapped pinned memory. *

*

Flags provides for future * releases. For now, it must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pdptr Returned device pointer * @param p Host pointer * @param Flags Options (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags) { return checkResult(cuMemHostGetDevicePointerNative(ret, p, Flags)); } private static native int cuMemHostGetDevicePointerNative(CUdeviceptr ret, Pointer p, int Flags); /** * Passes back flags that were used for a pinned allocation. * *
     * CUresult cuMemHostGetFlags (
     *      unsigned int* pFlags,
     *      void* p )
     * 
*
*

Passes back flags that were used for a * pinned allocation. Passes back the flags pFlags that were * specified when allocating the pinned host buffer p allocated * by cuMemHostAlloc. *

*

cuMemHostGetFlags() will fail if the * pointer does not reside in an allocation performed by cuMemAllocHost() * or cuMemHostAlloc(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pFlags Returned flags word * @param p Host pointer * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemHostAlloc */ public static int cuMemHostGetFlags (int pFlags[], Pointer p) { return checkResult(cuMemHostGetFlagsNative(pFlags, p)); } private static native int cuMemHostGetFlagsNative(int pFlags[], Pointer p); /** * Returns a handle to a compute device. * *
     * CUresult cuDeviceGetByPCIBusId (
     *      CUdevice* dev,
     *      char* pciBusId )
     * 
*
*

Returns a handle to a compute device. * Returns in *device a device handle given a PCI bus ID * string. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dev Returned device handle * @param pciBusId String in one of the following forms: [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetPCIBusId */ public static int cuDeviceGetByPCIBusId(CUdevice dev, String pciBusId) { return checkResult(cuDeviceGetByPCIBusIdNative(dev, pciBusId)); } private static native int cuDeviceGetByPCIBusIdNative(CUdevice dev, String pciBusId); /** *
     * CUresult cuMemAllocManaged (
     *      CUdeviceptr* dptr,
     *      size_t bytesize,
     *      unsigned int  flags )
     * 
* *
Allocates memory that will be automatically managed by the Unified * Memory system.
*
Description
*

* Allocates bytesize bytes of managed memory on the device and * returns in *dptr a pointer to the allocated memory. If the * device doesn't support allocating managed memory, * CUDA_ERROR_NOT_SUPPORTED is returned. Support for managed memory can be * queried using the device attribute CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY. * The allocated memory is suitably aligned for any kind of variable. The * memory is not cleared. If bytesize is 0, cuMemAllocManaged * returns CUDA_ERROR_INVALID_VALUE. The pointer is valid on the CPU and on * all GPUs in the system that support managed memory. All accesses to this * pointer must obey the Unified Memory programming model. *

*

* flags specifies the default stream association for this * allocation. flags must be one of CU_MEM_ATTACH_GLOBAL or * CU_MEM_ATTACH_HOST. If CU_MEM_ATTACH_GLOBAL is specified, then this * memory is accessible from any stream on any device. If CU_MEM_ATTACH_HOST * is specified, then the allocation is created with initial visibility * restricted to host access only; an explicit call to * cuStreamAttachMemAsync will be required to enable access on the device. *

*

* If the association is later changed via cuStreamAttachMemAsync to a * single stream, the default association as specifed during * cuMemAllocManaged is restored when that stream is destroyed. For * __managed__ variables, the default association is always * CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an asynchronous * operation, and as a result, the change to default association won't * happen until all work in the stream has completed. *

*

* Memory allocated with cuMemAllocManaged should be released with * cuMemFree. *

*

* On a multi-GPU system with peer-to-peer support, where multiple GPUs * support managed memory, the physical storage is created on the GPU which * is active at the time cuMemAllocManaged is called. All other GPUs will * reference the data at reduced bandwidth via peer mappings over the PCIe * bus. The Unified Memory management system does not migrate memory between * GPUs. *

*

* On a multi-GPU system where multiple GPUs support managed memory, but not * all pairs of such GPUs have peer-to-peer support between them, the * physical storage is created in 'zero-copy' or system memory. All GPUs * will reference the data at reduced bandwidth over the PCIe bus. In these * circumstances, use of the environment variable, CUDA_VISIBLE_DEVICES, is * recommended to restrict CUDA to only use those GPUs that have * peer-to-peer support. Alternatively, users can also set * CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to force the driver * to always use device memory for physical storage. When this environment * variable is set to a non-zero value, all contexts created in that process * on devices that support managed memory have to be peer-to-peer compatible * with each other. Context creation will fail if a context is created on a * device that supports managed memory and is not peer-to-peer compatible * with any of the other managed memory supporting devices on which contexts * were previously created, even if those contexts have been destroyed. * These environment variables are described in the CUDA programming guide * under the "CUDA environment variables" section. *

*
Note: *

* Note that this function may also return error codes from previous, * asynchronous launches. *

*
*

*
* * @param dptr The device pointer * @param bytesize The size in bytes * @param flags The flags * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_NOT_SUPPORTED, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuStreamAttachMemAsync */ public static int cuMemAllocManaged(CUdeviceptr dptr, long bytesize, int flags) { return checkResult(cuMemAllocManagedNative(dptr, bytesize, flags)); } private static native int cuMemAllocManagedNative(CUdeviceptr dptr, long bytesize, int flags); /** * Returns a PCI Bus Id string for the device. * *
     * CUresult cuDeviceGetPCIBusId (
     *      char* pciBusId,
     *      int  len,
     *      CUdevice dev )
     * 
*
*

Returns a PCI Bus Id string for the * device. Returns an ASCII string identifying the device dev * in the NULL-terminated string pointed to by pciBusId. len specifies the maximum length of the string that may be * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pciBusId Returned identifier string for the device in the following format [domain]:[bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values. pciBusId should be large enough to store 13 characters including the NULL-terminator. * @param len Maximum length of string to store in name * @param dev Device to get identifier string for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuDeviceGet * @see JCudaDriver#cuDeviceGetAttribute * @see JCudaDriver#cuDeviceGetByPCIBusId */ public static int cuDeviceGetPCIBusId(String pciBusId[], int len, CUdevice dev) { return checkResult(cuDeviceGetPCIBusIdNative(pciBusId, len, dev)); } private static native int cuDeviceGetPCIBusIdNative(String pciBusId[], int len, CUdevice dev); /** * Gets an interprocess handle for a previously allocated event. * *
     * CUresult cuIpcGetEventHandle (
     *      CUipcEventHandle* pHandle,
     *      CUevent event )
     * 
*
*

Gets an interprocess handle for a * previously allocated event. Takes as input a previously allocated * event. This event must * have been created with the * CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags set. This * opaque handle may be copied into other processes and opened with * cuIpcOpenEventHandle to allow efficient hardware synchronization * between GPU work in different processes. *

*

After the event has been been opened in * the importing process, cuEventRecord, cuEventSynchronize, * cuStreamWaitEvent and cuEventQuery may be used in either process. * Performing operations on the imported event after the exported event * has been freed with cuEventDestroy will result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pHandle Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle * @param event Event allocated with CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcGetEventHandle(CUipcEventHandle pHandle, CUevent event) { return checkResult(cuIpcGetEventHandleNative(pHandle, event)); } private static native int cuIpcGetEventHandleNative(CUipcEventHandle pHandle, CUevent event); /** * Opens an interprocess event handle for use in the current process. * *
     * CUresult cuIpcOpenEventHandle (
     *      CUevent* phEvent,
     *      CUipcEventHandle handle )
     * 
*
*

Opens an interprocess event handle for * use in the current process. Opens an interprocess event handle exported * from another * process with cuIpcGetEventHandle. This * function returns a CUevent that behaves like a locally created event * with the CU_EVENT_DISABLE_TIMING flag specified. This event must be * freed with cuEventDestroy. *

*

Performing operations on the imported * event after the exported event has been freed with cuEventDestroy will * result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param phEvent Returns the imported event * @param handle Interprocess handle to open * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcOpenEventHandle(CUevent phEvent, CUipcEventHandle handle) { return checkResult(cuIpcOpenEventHandleNative(phEvent, handle)); } private static native int cuIpcOpenEventHandleNative(CUevent phEvent, CUipcEventHandle handle); /** * Gets an interprocess memory handle for an existing device memory * allocation. * *
     * CUresult cuIpcGetMemHandle (
     *      CUipcMemHandle* pHandle,
     *      CUdeviceptr dptr )
     * 
*
*

/brief Gets an interprocess memory * handle for an existing device memory allocation *

*

Takes a pointer to the base of an * existing device memory allocation created with cuMemAlloc and exports * it for use in another process. This is a lightweight operation and may * be called multiple times on an allocation * without adverse effects. *

*

If a region of memory is freed with * cuMemFree and a subsequent call to cuMemAlloc returns memory with the * same device address, cuIpcGetMemHandle will return a unique handle for * the new memory. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pHandle Pointer to user allocated CUipcMemHandle to return the handle in. * @param dptr Base pointer to previously allocated device memory * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_MAP_FAILED, * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcOpenMemHandle * @see JCudaDriver#cuIpcCloseMemHandle */ public static int cuIpcGetMemHandle(CUipcMemHandle pHandle, CUdeviceptr dptr) { return checkResult(cuIpcGetMemHandleNative(pHandle, dptr)); } private static native int cuIpcGetMemHandleNative(CUipcMemHandle pHandle, CUdeviceptr dptr); /** * *
     * CUresult cuIpcOpenMemHandle (
     *      CUdeviceptr* pdptr,
     *      CUipcMemHandle handle,
     *      unsigned int  Flags )
     * 
*
*

/brief Opens an interprocess memory * handle exported from another process and returns a device pointer * usable in the local * process. *

*

Maps memory exported from another * process with cuIpcGetMemHandle into the current device address space. * For contexts on different devices cuIpcOpenMemHandle can attempt to * enable peer access between the devices as if the user called * cuCtxEnablePeerAccess. This behavior is controlled by the * CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. cuDeviceCanAccessPeer can * determine if a mapping is possible. *

*

Contexts that may open CUipcMemHandles * are restricted in the following way. CUipcMemHandles from each CUdevice * in a given process may only be opened by one CUcontext per CUdevice * per other process. *

* If the memory handle has already been opened by the current context, the * reference count on the handle is incremented by 1 and the existing device pointer * is returned. *

Memory returned from cuIpcOpenMemHandle * must be freed with cuIpcCloseMemHandle. *

*

Calling cuMemFree on an exported memory * region before calling cuIpcCloseMemHandle in the importing context will * result in undefined behavior. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param pdptr Returned device pointer * @param handle CUipcMemHandle to open * @param Flags Flags for this operation. Must be specified as CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_TOO_MANY_PEERS * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcCloseMemHandle * @see JCudaDriver#cuCtxEnablePeerAccess * @see JCudaDriver#cuDeviceCanAccessPeer */ public static int cuIpcOpenMemHandle(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags) { return checkResult(cuIpcOpenMemHandleNative(pdptr, handle, Flags)); } private static native int cuIpcOpenMemHandleNative(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags); /** * Close memory mapped with cuIpcOpenMemHandle. * *
     * CUresult cuIpcCloseMemHandle (
     *      CUdeviceptr dptr )
     * 
*
*

Close memory mapped with cuIpcOpenMemHandle. * Decrements the reference count of the memory returned by ::cuIpcOpenMemHandle by 1. * When the reference count reaches 0, this API unmaps the memory. The original allocation * in the exporting process as well as imported mappings in other processes * will be unaffected. *

*

Any resources used to enable peer access * will be freed if this is the last mapping using them. *

*

IPC functionality is restricted to * devices with support for unified addressing on Linux operating * systems. *

*
* * @param dptr Device pointer returned by cuIpcOpenMemHandle * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED, * CUDA_ERROR_INVALID_HANDLE, * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuIpcGetEventHandle * @see JCudaDriver#cuIpcOpenEventHandle * @see JCudaDriver#cuIpcGetMemHandle * @see JCudaDriver#cuIpcOpenMemHandle */ public static int cuIpcCloseMemHandle(CUdeviceptr dptr) { return checkResult(cuIpcCloseMemHandleNative(dptr)); } private static native int cuIpcCloseMemHandleNative(CUdeviceptr dptr); /** * Registers an existing host memory range for use by CUDA. * *
     * CUresult cuMemHostRegister (
     *      void* p,
     *      size_t bytesize,
     *      unsigned int  Flags )
     * 
*
*

Registers an existing host memory range * for use by CUDA. Page-locks the memory range specified by p * and bytesize and maps it for the device(s) as specified by * Flags. This memory range also is added to the same tracking * mechanism as cuMemHostAlloc to automatically accelerate calls to * functions such as cuMemcpyHtoD(). Since the memory can be accessed * directly by the device, it can be read or written with much higher * bandwidth than pageable * memory that has not been registered. * Page-locking excessive amounts of memory may degrade system performance, * since it reduces * the amount of memory available to the * system for paging. As a result, this function is best used sparingly * to register staging * areas for data exchange between host and * device. *

*

This function has limited support on * Mac OS X. OS 10.7 or higher is required. *

*

The Flags parameter enables * different options to be specified that affect the allocation, as * follows. *

*
* - ::CU_MEMHOSTREGISTER_PORTABLE: The memory returned by this call will be * considered as pinned memory by all CUDA contexts, not just the one that * performed the allocation. *

* - ::CU_MEMHOSTREGISTER_DEVICEMAP: Maps the allocation into the CUDA address * space. The device pointer to the memory may be obtained by calling * ::cuMemHostGetDevicePointer(). *

* - ::CU_MEMHOSTREGISTER_IOMEMORY: The pointer is treated as pointing to some * I/O memory space, e.g. the PCI Express resource of a 3rd party device. *

* - ::CU_MEMHOSTREGISTER_READ_ONLY: The pointer is treated as pointing to memory * that is considered read-only by the device. On platforms without * CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is * required in order to register memory mapped to the CPU as read-only. Support * for the use of this flag can be queried from the device attribute * CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with * a current context associated with a device that does not have this attribute * set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED. *

*

*

All of these flags are orthogonal to * one another: a developer may page-lock memory that is portable or * mapped with no restrictions. *

*

The CUDA context must have been created * with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTREGISTER_DEVICEMAP * flag to have any effect. *

*

The CU_MEMHOSTREGISTER_DEVICEMAP flag * may be specified on CUDA contexts for devices that do not support * mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer() * because the memory may be mapped into other CUDA contexts via the * CU_MEMHOSTREGISTER_PORTABLE flag. *

*

The memory page-locked by this function * must be unregistered with cuMemHostUnregister(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Host pointer to memory to page-lock * @param bytesize Size in bytes of the address range to page-lock * @param Flags Flags for allocation request * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED * * @see JCudaDriver#cuMemHostUnregister * @see JCudaDriver#cuMemHostGetFlags * @see JCudaDriver#cuMemHostGetDevicePointer */ public static int cuMemHostRegister(Pointer p, long bytesize, int Flags) { return checkResult(cuMemHostRegisterNative(p, bytesize, Flags)); } private static native int cuMemHostRegisterNative(Pointer p, long bytesize, int Flags); /** * Unregisters a memory range that was registered with cuMemHostRegister. * *
     * CUresult cuMemHostUnregister (
     *      void* p )
     * 
*
*

Unregisters a memory range that was * registered with cuMemHostRegister. Unmaps the memory range whose base * address is specified * by p, and makes it pageable * again. *

*

The base address must be the same one * specified to cuMemHostRegister(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Host pointer to memory to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, * * @see JCudaDriver#cuMemHostRegister */ public static int cuMemHostUnregister(Pointer p) { return checkResult(cuMemHostUnregisterNative(p)); } private static native int cuMemHostUnregisterNative(Pointer p); /** * Copies memory. * *
     * CUresult cuMemcpy (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount )
     * 
*
*

Copies memory. Copies data between two * pointers. dst and src are base pointers of the * destination and source, respectively. ByteCount specifies * the number of bytes to copy. Note that this function infers the type * of the transfer (host to host, host to device, * device to device, or device to host) from * the pointer values. This function is only allowed in contexts which * support unified * addressing. Note that this function is * synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dst Destination unified virtual address space pointer * @param src Source unified virtual address space pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy(CUdeviceptr dst, CUdeviceptr src, long ByteCount) { return checkResult(cuMemcpyNative(dst, src, ByteCount)); } private static native int cuMemcpyNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount); /** * Copies device memory between two contexts. * *
     * CUresult cuMemcpyPeer (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount )
     * 
*
*

Copies device memory between two contexts. * Copies from device memory in one context to device memory in another * context. * dstDevice is the base device * pointer of the destination memory and dstContext is the * destination context. srcDevice is the base device pointer of * the source memory and srcContext is the source pointer. ByteCount specifies the number of bytes to copy. *

*

Note that this function is asynchronous * with respect to the host, but serialized with respect all pending and * future asynchronous * work in to the current context, srcContext, and dstContext (use cuMemcpyPeerAsync to * avoid this synchronization). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstContext Destination context * @param srcDevice Source device pointer * @param srcContext Source context * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpy3DPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount) { return cuMemcpyPeerNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount); } private static native int cuMemcpyPeerNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount); /** * Allocates device memory. * *
     * CUresult cuMemAlloc (
     *      CUdeviceptr* dptr,
     *      size_t bytesize )
     * 
*
*

Allocates device memory. Allocates bytesize bytes of linear memory on the device and returns in *dptr a pointer to the allocated memory. The allocated memory is * suitably aligned for any kind of variable. The memory is not cleared. * If bytesize is 0, cuMemAlloc() * returns CUDA_ERROR_INVALID_VALUE. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned device pointer * @param bytesize Requested allocation size in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) { return checkResult(cuMemAllocNative(dptr, bytesize)); } private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize); /** * Allocates pitched device memory. * *
     * CUresult cuMemAllocPitch (
     *      CUdeviceptr* dptr,
     *      size_t* pPitch,
     *      size_t WidthInBytes,
     *      size_t Height,
     *      unsigned int  ElementSizeBytes )
     * 
*
*

Allocates pitched device memory. * Allocates at least WidthInBytes * Height bytes of * linear memory on the device and returns in *dptr a pointer * to the allocated memory. The function may pad the allocation to ensure * that corresponding pointers in any given * row will continue to meet the alignment * requirements for coalescing as the address is updated from row to row. * ElementSizeBytes specifies the size of the largest reads and * writes that will be performed on the memory range. ElementSizeBytes may be 4, 8 or 16 (since coalesced memory * transactions are not possible on other data sizes). If ElementSizeBytes is smaller than the actual read/write size of a * kernel, the kernel will run correctly, but possibly at reduced speed. * The * pitch returned in *pPitch by * cuMemAllocPitch() is the width in bytes of the allocation. The intended * usage of pitch is as a separate parameter of the allocation, used to * compute addresses within the 2D array. * Given the row and column of an array element of type T, * the address is computed as: *

   T* pElement = (T*)((char*)BaseAddress
     * + Row * Pitch) + Column;
*

*

The pitch returned by cuMemAllocPitch() * is guaranteed to work with cuMemcpy2D() under all circumstances. For * allocations of 2D arrays, it is recommended that programmers consider * performing pitch allocations * using cuMemAllocPitch(). Due to alignment * restrictions in the hardware, this is especially true if the application * will be performing 2D memory copies * between different regions of device * memory (whether linear memory or CUDA arrays). *

*

The byte alignment of the pitch returned * by cuMemAllocPitch() is guaranteed to match or exceed the alignment * requirement for texture binding with cuTexRefSetAddress2D(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned device pointer * @param pPitch Returned pitch of allocation in bytes * @param WidthInBytes Requested allocation width in bytes * @param Height Requested allocation height in rows * @param ElementSizeBytes Size of largest reads/writes for range * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAllocPitch(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes) { return checkResult(cuMemAllocPitchNative(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes)); } private static native int cuMemAllocPitchNative(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes); /** * Frees device memory. * *
     * CUresult cuMemFree (
     *      CUdeviceptr dptr )
     * 
*
*

Frees device memory. Frees the memory * space pointed to by dptr, which must have been returned by a * previous call to cuMemAlloc() or cuMemAllocPitch(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Pointer to memory to free * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemFree(CUdeviceptr dptr) { return checkResult(cuMemFreeNative(dptr)); } private static native int cuMemFreeNative(CUdeviceptr dptr); /** * Get information on memory allocations. * *
     * CUresult cuMemGetAddressRange (
     *      CUdeviceptr* pbase,
     *      size_t* psize,
     *      CUdeviceptr dptr )
     * 
*
*

Get information on memory allocations. * Returns the base address in *pbase and size in *psize * of the allocation by cuMemAlloc() or cuMemAllocPitch() that contains * the input pointer dptr. Both parameters pbase and * psize are optional. If one of them is NULL, it is ignored. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pbase Returned base address * @param psize Returned size of device memory allocation * @param dptr Device pointer to query * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemGetAddressRange(CUdeviceptr pbase, long psize[], CUdeviceptr dptr) { return checkResult(cuMemGetAddressRangeNative(pbase, psize, dptr)); } private static native int cuMemGetAddressRangeNative(CUdeviceptr pbase, long psize[], CUdeviceptr dptr); /** * Allocates page-locked host memory. * *
     * CUresult cuMemAllocHost (
     *      void** pp,
     *      size_t bytesize )
     * 
*
*

Allocates page-locked host memory. * Allocates bytesize bytes of host memory that is page-locked * and accessible to the device. The driver tracks the virtual memory * ranges allocated * with this function and automatically * accelerates calls to functions such as cuMemcpy(). Since the memory * can be accessed directly by the device, it can be read or written with * much higher bandwidth than pageable * memory obtained with functions such as * malloc(). Allocating excessive amounts of memory with cuMemAllocHost() * may degrade system performance, since it reduces the amount of memory * available to the system for paging. As a result, this * function is best used sparingly to * allocate staging areas for data exchange between host and device. *

*

Note all host memory allocated using * cuMemHostAlloc() will automatically be immediately accessible to all * contexts on all devices which support unified addressing (as may be * queried * using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). * The device pointer that may be used to access this host memory from * those contexts is always equal to the returned host * pointer *pp. See Unified * Addressing for additional details. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pp Returned host pointer to page-locked memory * @param bytesize Requested allocation size in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemAllocHost(Pointer pointer, long bytesize) { return checkResult(cuMemAllocHostNative(pointer, bytesize)); } private static native int cuMemAllocHostNative(Pointer pp, long bytesize); /** * Frees page-locked host memory. * *
     * CUresult cuMemFreeHost (
     *      void* p )
     * 
*
*

Frees page-locked host memory. Frees * the memory space pointed to by p, which must have been * returned by a previous call to cuMemAllocHost(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param p Pointer to memory to free * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemFreeHost(Pointer p) { return checkResult(cuMemFreeHostNative(p)); } private static native int cuMemFreeHostNative(Pointer p); /** * Copies memory from Host to Device. * *
     * CUresult cuMemcpyHtoD (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Host to Device. * Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, * respectively. ByteCount specifies the number of bytes to * copy. Note that this function is synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount) { return checkResult(cuMemcpyHtoDNative(dstDevice, srcHost, ByteCount)); } private static native int cuMemcpyHtoDNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount); /** * Copies memory from Device to Host. * *
     * CUresult cuMemcpyDtoH (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Host. * Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is synchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination host pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount)); } private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Device to Device. * *
     * CUresult cuMemcpyDtoD (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Device. * Copies from device memory to device memory. dstDevice and * srcDevice are the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is asynchronous. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoDNative(dstDevice, srcDevice, ByteCount)); } private static native int cuMemcpyDtoDNative(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Device to Array. * *
     * CUresult cuMemcpyDtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Device to Array. * Copies from device memory to a 1D CUDA array. dstArray and * dstOffset specify the CUDA array handle and starting index * of the destination data. srcDevice specifies the base pointer * of the source. ByteCount specifies the number of bytes to * copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyDtoA(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount) { return checkResult(cuMemcpyDtoANative(dstArray, dstIndex, srcDevice, ByteCount)); } private static native int cuMemcpyDtoANative(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount); /** * Copies memory from Array to Device. * *
     * CUresult cuMemcpyAtoD (
     *      CUdeviceptr dstDevice,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Device. * Copies from one 1D CUDA array to device memory. dstDevice * specifies the base pointer of the destination and must be naturally * aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array handle and the offset in bytes * into the array where the copy is to begin. ByteCount specifies * the number of bytes to copy and must be evenly divisible by the array * element size. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount) { return checkResult(cuMemcpyAtoDNative(dstDevice, hSrc, SrcIndex, ByteCount)); } private static native int cuMemcpyAtoDNative(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount); /** * Copies memory from Host to Array. * *
     * CUresult cuMemcpyHtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Host to Array. Copies * from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in * bytes of the destination data. pSrc specifies the base * address of the source. ByteCount specifies the number of * bytes to copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyHtoA(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount) { return checkResult(cuMemcpyHtoANative(dstArray, dstIndex, pSrc, ByteCount)); } private static native int cuMemcpyHtoANative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount); /** * Copies memory from Array to Host. * *
     * CUresult cuMemcpyAtoH (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Host. Copies * from one 1D CUDA array to host memory. dstHost specifies the * base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in * bytes of the source data. ByteCount specifies the number of * bytes to copy. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination device pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount) { return checkResult(cuMemcpyAtoHNative(dstHost, srcArray, srcIndex, ByteCount)); } private static native int cuMemcpyAtoHNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount); /** * Copies memory from Array to Array. * *
     * CUresult cuMemcpyAtoA (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount )
     * 
*
*

Copies memory from Array to Array. * Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA * arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source offsets in bytes * into the CUDA arrays. ByteCount is the number of bytes to be * copied. The size of the elements in the CUDA arrays need not be the * same format, but the elements * must be the same size; and count must be * evenly divisible by that size. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpyAtoA(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount) { return checkResult(cuMemcpyAtoANative(dstArray, dstIndex, srcArray, srcIndex, ByteCount)); } private static native int cuMemcpyAtoANative(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2D (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *           const void *srcHost;
     *           CUdeviceptr srcDevice;
     *           CUarray srcArray;
     *           unsigned int srcPitch;
     *
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *           void *dstHost;
     *           CUdeviceptr dstDevice;
     *           CUarray dstArray;
     *           unsigned int dstPitch;
     *
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy) { return checkResult(cuMemcpy2DNative(pCopy)); } private static native int cuMemcpy2DNative(CUDA_MEMCPY2D pCopy); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2DUnaligned (
     *      const CUDA_MEMCPY2D* pCopy )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy) { return checkResult(cuMemcpy2DUnalignedNative(pCopy)); } private static native int cuMemcpy2DUnalignedNative(CUDA_MEMCPY2D pCopy); /** * Copies memory for 3D arrays. * *
     * CUresult cuMemcpy3D (
     *      const CUDA_MEMCPY3D* pCopy )
     * 
*
*

Copies memory for 3D arrays. Perform a * 3D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY3D structure is defined as: *

*
        typedef struct CUDA_MEMCPY3D_st
     * {
     *
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     *
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     *
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost, srcPitch and srcHeight specify the (host) base address of the * source data, the bytes per row, and the height of * each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice, srcPitch and srcHeight specify the (device) base address of * the source data, the bytes per row, and the height * of each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice, * srcPitch and srcHeight are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice, dstPitch and dstHeight are ignored. *

*
    *
  • *

    srcXInBytes, srcY and srcZ * specify the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes, dstY and dstZ * specify the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes, Height and Depth * specify the width (in bytes), height and depth of the 3D copy being * performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy3D() returns an error if any * pitch is greater than the maximum allowed * (CU_DEVICE_ATTRIBUTE_MAX_PITCH). *

*

* The srcLOD and dstLOD members of the * CUDA_MEMCPY3D structure must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy) { return checkResult(cuMemcpy3DNative(pCopy)); } private static native int cuMemcpy3DNative(CUDA_MEMCPY3D pCopy); /** * Copies memory between contexts. * *
     * CUresult cuMemcpy3DPeer (
     *      const CUDA_MEMCPY3D_PEER* pCopy )
     * 
*
*

Copies memory between contexts. Perform * a 3D memory copy according to the parameters specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER structure * for documentation of its parameters. *

*

Note that this function is synchronous * with respect to the host only if the source or destination memory is * of type CU_MEMORYTYPE_HOST. Note also that this copy is serialized with * respect all pending and future asynchronous work in to the current * context, * the copy's source context, and the copy's * destination context (use cuMemcpy3DPeerAsync to avoid this * synchronization). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpy3DPeer(CUDA_MEMCPY3D_PEER pCopy) { return checkResult(cuMemcpy3DPeerNative(pCopy)); } private static native int cuMemcpy3DPeerNative(CUDA_MEMCPY3D_PEER pCopy); /** * Copies memory asynchronously. * *
     * CUresult cuMemcpyAsync (
     *      CUdeviceptr dst,
     *      CUdeviceptr src,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory asynchronously. Copies * data between two pointers. dst and src are base * pointers of the destination and source, respectively. ByteCount * specifies the number of bytes to copy. Note that this function infers * the type of the transfer (host to host, host to device, * device to device, or device to host) from * the pointer values. This function is only allowed in contexts which * support unified * addressing. Note that this function is * asynchronous and can optionally be associated to a stream by passing a * non-zero hStream argument *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dst Destination unified virtual address space pointer * @param src Source unified virtual address space pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyAsyncNative(dst, src, ByteCount, hStream)); } private static native int cuMemcpyAsyncNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream); /** * Copies device memory between two contexts asynchronously. * *
     * CUresult cuMemcpyPeerAsync (
     *      CUdeviceptr dstDevice,
     *      CUcontext dstContext,
     *      CUdeviceptr srcDevice,
     *      CUcontext srcContext,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies device memory between two contexts * asynchronously. Copies from device memory in one context to device * memory in another * context. dstDevice is the base * device pointer of the destination memory and dstContext is * the destination context. srcDevice is the base device pointer * of the source memory and srcContext is the source pointer. * ByteCount specifies the number of bytes to copy. Note that * this function is asynchronous with respect to the host and all work in * other * streams in other devices. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstContext Destination context * @param srcDevice Source device pointer * @param srcContext Source context * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpy3DPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyPeerAsyncNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream)); } private static native int cuMemcpyPeerAsyncNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream); /** * Copies memory from Host to Device. * *
     * CUresult cuMemcpyHtoDAsync (
     *      CUdeviceptr dstDevice,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Host to Device. * Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, * respectively. ByteCount specifies the number of bytes to * copy. *

*

cuMemcpyHtoDAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyHtoDAsyncNative(dstDevice, srcHost, ByteCount, hStream)); } private static native int cuMemcpyHtoDAsyncNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream); /** * Copies memory from Device to Host. * *
     * CUresult cuMemcpyDtoHAsync (
     *      void* dstHost,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Device to Host. * Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. *

*

cuMemcpyDtoHAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination host pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyDtoHAsync(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyDtoHAsyncNative(dstHost, srcDevice, ByteCount, hStream)); } private static native int cuMemcpyDtoHAsyncNative(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream); /** * Copies memory from Device to Device. * *
     * CUresult cuMemcpyDtoDAsync (
     *      CUdeviceptr dstDevice,
     *      CUdeviceptr srcDevice,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Device to Device. * Copies from device memory to device memory. dstDevice and * srcDevice are the base pointers of the destination and * source, respectively. ByteCount specifies the number of bytes * to copy. Note that this function is asynchronous and can optionally be * associated to a stream * by passing a non-zero hStream * argument *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param srcDevice Source device pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyDtoDAsyncNative(dstDevice, srcDevice, ByteCount, hStream)); } private static native int cuMemcpyDtoDAsyncNative(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream); /** * Copies memory from Host to Array. * *
     * CUresult cuMemcpyHtoAAsync (
     *      CUarray dstArray,
     *      size_t dstOffset,
     *      const void* srcHost,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Host to Array. Copies * from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in * bytes of the destination data. srcHost specifies the base * address of the source. ByteCount specifies the number of * bytes to copy. *

*

cuMemcpyHtoAAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns * an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstArray Destination array * @param dstOffset Offset in bytes of destination array * @param srcHost Source host pointer * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyHtoAAsync(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyHtoAAsyncNative(dstArray, dstIndex, pSrc, ByteCount, hStream)); } private static native int cuMemcpyHtoAAsyncNative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream); /** * Copies memory from Array to Host. * *
     * CUresult cuMemcpyAtoHAsync (
     *      void* dstHost,
     *      CUarray srcArray,
     *      size_t srcOffset,
     *      size_t ByteCount,
     *      CUstream hStream )
     * 
*
*

Copies memory from Array to Host. Copies * from one 1D CUDA array to host memory. dstHost specifies the * base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in * bytes of the source data. ByteCount specifies the number of * bytes to copy. *

*

cuMemcpyAtoHAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstHost Destination pointer * @param srcArray Source array * @param srcOffset Offset in bytes of source array * @param ByteCount Size of memory copy in bytes * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream) { return checkResult(cuMemcpyAtoHAsyncNative(dstHost, srcArray, srcIndex, ByteCount, hStream)); } private static native int cuMemcpyAtoHAsyncNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream); /** * Copies memory for 2D arrays. * *
     * CUresult cuMemcpy2DAsync (
     *      const CUDA_MEMCPY2D* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory for 2D arrays. Perform a * 2D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY2D structure is defined as: *

*
   typedef struct CUDA_MEMCPY2D_st {
     *       unsigned int srcXInBytes, srcY;
     *       CUmemorytype srcMemoryType;
     *       const void *srcHost;
     *       CUdeviceptr srcDevice;
     *       CUarray srcArray;
     *       unsigned int srcPitch;
     *       unsigned int dstXInBytes, dstY;
     *       CUmemorytype dstMemoryType;
     *       void *dstHost;
     *       CUdeviceptr dstDevice;
     *       CUarray dstArray;
     *       unsigned int dstPitch;
     *       unsigned int WidthInBytes;
     *       unsigned int Height;
     *    } CUDA_MEMCPY2D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost and srcPitch specify the (host) base address of the source data * and the bytes per row to apply. srcArray is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice and srcPitch specify the (device) base address of the source * data and the bytes per row to apply. srcArray is * ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice * and srcPitch are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data and the bytes per row to apply. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data and the bytes per row to apply. dstArray * is ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice and dstPitch are ignored. *

*
    *
  • *

    srcXInBytes and srcY specify * the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+srcY*srcPitch +
     * srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+srcY*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes and dstY specify * the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
     * dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+dstY*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes and Height specify * the width (in bytes) and height of the 2D copy being performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy2D() returns an error if any * pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH). * cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D(). * On intra-device memory copies (device to device, CUDA array to device, * CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not * computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this * restriction, but may run significantly slower in the cases where * cuMemcpy2D() would have returned an error code. *

*

cuMemcpy2DAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream) { return checkResult(cuMemcpy2DAsyncNative(pCopy, hStream)); } private static native int cuMemcpy2DAsyncNative(CUDA_MEMCPY2D pCopy, CUstream hStream); /** * Copies memory for 3D arrays. * *
     * CUresult cuMemcpy3DAsync (
     *      const CUDA_MEMCPY3D* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory for 3D arrays. Perform a * 3D memory copy according to the parameters specified in pCopy. * The CUDA_MEMCPY3D structure is defined as: *

*
        typedef struct CUDA_MEMCPY3D_st
     * {
     *
     *             unsigned int srcXInBytes, srcY, srcZ;
     *             unsigned int srcLOD;
     *             CUmemorytype srcMemoryType;
     *                 const void *srcHost;
     *                 CUdeviceptr srcDevice;
     *                 CUarray srcArray;
     *                 unsigned int srcPitch;  // ignored when src is array
     *                 unsigned int srcHeight; // ignored when src is array;
     * may be 0 if Depth==1
     *
     *             unsigned int dstXInBytes, dstY, dstZ;
     *             unsigned int dstLOD;
     *             CUmemorytype dstMemoryType;
     *                 void *dstHost;
     *                 CUdeviceptr dstDevice;
     *                 CUarray dstArray;
     *                 unsigned int dstPitch;  // ignored when dst is array
     *                 unsigned int dstHeight; // ignored when dst is array;
     * may be 0 if Depth==1
     *
     *             unsigned int WidthInBytes;
     *             unsigned int Height;
     *             unsigned int Depth;
     *         } CUDA_MEMCPY3D;
* where: *
    *
  • *

    srcMemoryType and dstMemoryType * specify the type of memory of the source and destination, respectively; * CUmemorytype_enum * is defined as: *

    *
  • *
*

*
   typedef enum CUmemorytype_enum {
     *       CU_MEMORYTYPE_HOST = 0x01,
     *       CU_MEMORYTYPE_DEVICE = 0x02,
     *       CU_MEMORYTYPE_ARRAY = 0x03,
     *       CU_MEMORYTYPE_UNIFIED = 0x04
     *    } CUmemorytype;
*

*

If srcMemoryType is CU_MEMORYTYPE_UNIFIED, * srcDevice and srcPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. srcArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If srcMemoryType is CU_MEMORYTYPE_HOST, * srcHost, srcPitch and srcHeight specify the (host) base address of the * source data, the bytes per row, and the height of * each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_DEVICE, * srcDevice, srcPitch and srcHeight specify the (device) base address of * the source data, the bytes per row, and the height * of each 2D slice of the 3D array. srcArray * is ignored. *

*

If srcMemoryType is CU_MEMORYTYPE_ARRAY, * srcArray specifies the handle of the source data. srcHost, srcDevice, * srcPitch and srcHeight are ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_UNIFIED, * dstDevice and dstPitch specify the (unified virtual address space) base * address of the source data and the bytes per row * to apply. dstArray is ignored. This value * may be used only if unified addressing is supported in the calling * context. *

*

If dstMemoryType is CU_MEMORYTYPE_HOST, * dstHost and dstPitch specify the (host) base address of the destination * data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_DEVICE, * dstDevice and dstPitch specify the (device) base address of the * destination data, the bytes per row, and the height of each * 2D slice of the 3D array. dstArray is * ignored. *

*

If dstMemoryType is CU_MEMORYTYPE_ARRAY, * dstArray specifies the handle of the destination data. dstHost, * dstDevice, dstPitch and dstHeight are ignored. *

*
    *
  • *

    srcXInBytes, srcY and srcZ * specify the base address of the source data for the copy. *

    *
  • *
*

*

For host pointers, the starting address * is *

  void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
     * + srcXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr Start =
     * srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*

*

For CUDA arrays, srcXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    dstXInBytes, dstY and dstZ * specify the base address of the destination data for the copy. *

    *
  • *
*

*

For host pointers, the base address is *

  void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
     * + dstXInBytes);
*

*

For device pointers, the starting * address is *

  CUdeviceptr dstStart =
     * dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*

*

For CUDA arrays, dstXInBytes must be * evenly divisible by the array element size. *

*
    *
  • *

    WidthInBytes, Height and Depth * specify the width (in bytes), height and depth of the 3D copy being * performed. *

    *
  • *
  • *

    If specified, srcPitch must be * greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must * be greater than or equal * to WidthInBytes + dstXInBytes. *

    *
  • *
  • *

    If specified, srcHeight must * be greater than or equal to Height + srcY, and dstHeight must be * greater than or equal to Height * + dstY. *

    *
  • *
*

*

cuMemcpy3D() returns an error if any * pitch is greater than the maximum allowed * (CU_DEVICE_ATTRIBUTE_MAX_PITCH). *

*

cuMemcpy3DAsync() is asynchronous and * can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and * returns an error if a pointer to pageable memory is passed as input. *

*

The srcLOD and dstLOD members of the * CUDA_MEMCPY3D structure must be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream) { return checkResult(cuMemcpy3DAsyncNative(pCopy, hStream)); } private static native int cuMemcpy3DAsyncNative(CUDA_MEMCPY3D pCopy, CUstream hStream); /** * Copies memory between contexts asynchronously. * *
     * CUresult cuMemcpy3DPeerAsync (
     *      const CUDA_MEMCPY3D_PEER* pCopy,
     *      CUstream hStream )
     * 
*
*

Copies memory between contexts * asynchronously. Perform a 3D memory copy according to the parameters * specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER * structure for documentation of its parameters. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCopy Parameters for the memory copy * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyPeerAsync * @see JCudaDriver#cuMemcpy3DPeerAsync */ public static int cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream) { return checkResult(cuMemcpy3DPeerAsyncNative(pCopy, hStream)); } private static native int cuMemcpy3DPeerAsyncNative(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream); /** * Initializes device memory. * *
     * CUresult cuMemsetD8 (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 8-bit values to the specified value uc. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param uc Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD8(CUdeviceptr dstDevice, byte uc, long N) { return checkResult(cuMemsetD8Native(dstDevice, uc, N)); } private static native int cuMemsetD8Native(CUdeviceptr dstDevice, byte uc, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD16 (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 16-bit values to the specified value us. The dstDevice pointer must be two byte aligned. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param us Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD16(CUdeviceptr dstDevice, short us, long N) { return checkResult(cuMemsetD16Native(dstDevice, us, N)); } private static native int cuMemsetD16Native(CUdeviceptr dstDevice, short us, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD32 (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N )
     * 
*
*

Initializes device memory. Sets the * memory range of N 32-bit values to the specified value ui. The dstDevice pointer must be four byte aligned. *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param ui Value to set * @param N Number of elements * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD32(CUdeviceptr dstDevice, int ui, long N) { return checkResult(cuMemsetD32Native(dstDevice, ui, N)); } private static native int cuMemsetD32Native(CUdeviceptr dstDevice, int ui, long N); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D8 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 8-bit values to the specified value * uc. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * This function performs fastest when the pitch is one that has been * passed * back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param uc Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D8(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height) { return checkResult(cuMemsetD2D8Native(dstDevice, dstPitch, uc, Width, Height)); } private static native int cuMemsetD2D8Native(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D16 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 16-bit values to the specified value * us. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * The dstDevice pointer and dstPitch offset must be * two byte aligned. This function performs fastest when the pitch is one * that has been passed back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param us Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D16(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height) { return checkResult(cuMemsetD2D16Native(dstDevice, dstPitch, us, Width, Height)); } private static native int cuMemsetD2D16Native(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height); /** * Initializes device memory. * *
     * CUresult cuMemsetD2D32 (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height )
     * 
*
*

Initializes device memory. Sets the 2D * memory range of Width 32-bit values to the specified value * ui. Height specifies the number of rows to set, * and dstPitch specifies the number of bytes between each row. * The dstDevice pointer and dstPitch offset must be * four byte aligned. This function performs fastest when the pitch is * one that has been passed back by cuMemAllocPitch(). *

*

Note that this function is asynchronous * with respect to the host unless dstDevice refers to pinned * host memory. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param ui Value to set * @param Width Width of row * @param Height Number of rows * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D32(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height) { return checkResult(cuMemsetD2D32Native(dstDevice, dstPitch, ui, Width, Height)); } private static native int cuMemsetD2D32Native(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height); /** * Sets device memory. * *
     * CUresult cuMemsetD8Async (
     *      CUdeviceptr dstDevice,
     *      unsigned char  uc,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 8-bit values to the specified value uc. *

*

cuMemsetD8Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param uc Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD8Async(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream) { return checkResult(cuMemsetD8AsyncNative(dstDevice, uc, N, hStream)); } private static native int cuMemsetD8AsyncNative(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD16Async (
     *      CUdeviceptr dstDevice,
     *      unsigned short us,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 16-bit values to the specified value us. * The dstDevice pointer must be two byte aligned. *

*

cuMemsetD16Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param us Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD16Async(CUdeviceptr dstDevice, short us, long N, CUstream hStream) { return checkResult(cuMemsetD16AsyncNative(dstDevice, us, N, hStream)); } private static native int cuMemsetD16AsyncNative(CUdeviceptr dstDevice, short us, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD32Async (
     *      CUdeviceptr dstDevice,
     *      unsigned int  ui,
     *      size_t N,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the memory * range of N 32-bit values to the specified value ui. * The dstDevice pointer must be four byte aligned. *

*

cuMemsetD32Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param ui Value to set * @param N Number of elements * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 */ public static int cuMemsetD32Async(CUdeviceptr dstDevice, int ui, long N, CUstream hStream) { return checkResult(cuMemsetD32AsyncNative(dstDevice, ui, N, hStream)); } private static native int cuMemsetD32AsyncNative(CUdeviceptr dstDevice, int ui, long N, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D8Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned char  uc,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 8-bit values to the specified value uc. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. This * function performs fastest when the pitch is one that has been passed * back by cuMemAllocPitch(). *

*

cuMemsetD2D8Async() is asynchronous and * can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param uc Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D8Async(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D8AsyncNative(dstDevice, dstPitch, uc, Width, Height, hStream)); } private static native int cuMemsetD2D8AsyncNative(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D16Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned short us,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 16-bit values to the specified value us. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. The * dstDevice pointer and dstPitch offset must be two * byte aligned. This function performs fastest when the pitch is one that * has been passed back by cuMemAllocPitch(). *

*

cuMemsetD2D16Async() is asynchronous * and can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param us Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD2D32Async * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D16Async(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D16AsyncNative(dstDevice, dstPitch, us, Width, Height, hStream)); } private static native int cuMemsetD2D16AsyncNative(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream); /** * Sets device memory. * *
     * CUresult cuMemsetD2D32Async (
     *      CUdeviceptr dstDevice,
     *      size_t dstPitch,
     *      unsigned int  ui,
     *      size_t Width,
     *      size_t Height,
     *      CUstream hStream )
     * 
*
*

Sets device memory. Sets the 2D memory * range of Width 32-bit values to the specified value ui. Height specifies the number of rows to set, and * dstPitch specifies the number of bytes between each row. The * dstDevice pointer and dstPitch offset must be four * byte aligned. This function performs fastest when the pitch is one that * has been passed back by cuMemAllocPitch(). *

*

cuMemsetD2D32Async() is asynchronous * and can optionally be associated to a stream by passing a non-zero stream argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dstDevice Destination device pointer * @param dstPitch Pitch of destination device pointer * @param ui Value to set * @param Width Width of row * @param Height Number of rows * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D8Async * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D16Async * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD8Async * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD16Async * @see JCudaDriver#cuMemsetD32 * @see JCudaDriver#cuMemsetD32Async */ public static int cuMemsetD2D32Async(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream) { return checkResult(cuMemsetD2D32AsyncNative(dstDevice, dstPitch, ui, Width, Height, hStream)); } private static native int cuMemsetD2D32AsyncNative(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream); /** * Returns information about a function. * *
     * CUresult cuFuncGetAttribute (
     *      int* pi,
     *      CUfunction_attribute attrib,
     *      CUfunction hfunc )
     * 
*
*

Returns information about a function. * Returns in *pi the integer value of the attribute attrib on the kernel given by hfunc. The supported * attributes are: *

    *
  • *

    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: * The maximum number of threads per block, beyond which a launch of the * function would fail. This number depends on both the * function and the device on which * the function is currently loaded. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: * The size in bytes of statically-allocated shared memory per block * required by this function. This does not include dynamically-allocated * shared memory requested by the * user at runtime. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: * The size in bytes of user-allocated constant memory required by this * function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: * The size in bytes of local memory used by each thread of this * function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_NUM_REGS: * The number of registers used by each thread of this function. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_PTX_VERSION: * The PTX virtual architecture version for which the function was * compiled. This value is the major PTX version * 10 + the * minor PTX version, so a PTX * version 1.3 function would return the value 13. Note that this may * return the undefined value * of 0 for cubins compiled prior * to CUDA 3.0. *

    *
  • *
  • *

    CU_FUNC_ATTRIBUTE_BINARY_VERSION: * The binary architecture version for which the function was compiled. * This value is the major binary version * 10 + the minor * binary version, so a binary * version 1.3 function would return the value 13. Note that this will * return a value of 10 for legacy * cubins that do not have a * properly-encoded binary architecture version. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pi Returned attribute value * @param attrib Attribute requested * @param hfunc Function to query attribute of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncGetAttribute (int pi[], int attrib, CUfunction func) { return checkResult(cuFuncGetAttributeNative(pi, attrib, func)); } private static native int cuFuncGetAttributeNative(int pi[], int attrib, CUfunction func); /** * Sets information about a function.
*
* This call sets the value of a specified attribute attrib on the kernel given * by hfunc to an integer value specified by val * This function returns CUDA_SUCCESS if the new value of the attribute could be * successfully set. If the set fails, this call will return an error. * Not all attributes can have values set. Attempting to set a value on a read-only * attribute will result in an error (CUDA_ERROR_INVALID_VALUE) *
* Supported attributes for the cuFuncSetAttribute call are: *
    *
  • CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of * dynamically-allocated shared memory. The value should contain the requested * maximum size of dynamically-allocated shared memory. The sum of this value and * the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the * device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN. * The maximal size of requestable dynamic shared memory may differ by GPU * architecture. *
  • *
  • CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 * cache and shared memory use the same hardware resources, this sets the shared memory * carveout preference, in percent of the total resources. This is only a hint, and the * driver can choose a different ratio if required to execute the function. *
  • *
* * @param hfunc Function to query attribute of * @param attrib Attribute requested * @param value The value to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuLaunchKernel * @see JCuda#cudaFuncGetAttributes * @see JCuda#cudaFuncSetAttribute */ public static int cuFuncSetAttribute(CUfunction hfunc, int attrib, int value) { return checkResult(cuFuncSetAttributeNative(hfunc, attrib, value)); } private static native int cuFuncSetAttributeNative(CUfunction hfunc, int attrib, int value); /** * Sets the block-dimensions for the function. * *
     * CUresult cuFuncSetBlockShape (
     *      CUfunction hfunc,
     *      int  x,
     *      int  y,
     *      int  z )
     * 
*
*

Sets the block-dimensions for the * function. * Deprecated Specifies the x, y, and z dimensions of the thread blocks that are * created when the kernel given by hfunc is launched. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to specify dimensions of * @param x X dimension * @param y Y dimension * @param z Z dimension * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) { return checkResult(cuFuncSetBlockShapeNative(hfunc, x, y, z)); } private static native int cuFuncSetBlockShapeNative(CUfunction hfunc, int x, int y, int z); /** * Sets the dynamic shared-memory size for the function. * *
     * CUresult cuFuncSetSharedSize (
     *      CUfunction hfunc,
     *      unsigned int  bytes )
     * 
*
*

Sets the dynamic shared-memory size for * the function. * Deprecated Sets through bytes * the amount of dynamic shared memory that will be available to each * thread block when the kernel given by hfunc is launched. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to specify dynamic shared-memory size for * @param bytes Dynamic shared-memory size per thread in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuFuncSetSharedSize(CUfunction hfunc, int bytes) { return checkResult(cuFuncSetSharedSizeNative(hfunc, bytes)); } private static native int cuFuncSetSharedSizeNative(CUfunction hfunc, int bytes); /** * Sets the preferred cache configuration for a device function. * *
     * CUresult cuFuncSetCacheConfig (
     *      CUfunction hfunc,
     *      CUfunc_cache config )
     * 
*
*

Sets the preferred cache configuration * for a device function. On devices where the L1 cache and shared memory * use the same * hardware resources, this sets through * config the preferred cache configuration for the device * function hfunc. This is only a preference. The driver will * use the requested configuration if possible, but it is free to choose * a different * configuration if required to execute hfunc. Any context-wide preference set via cuCtxSetCacheConfig() * will be overridden by this per-function setting unless the per-function * setting is CU_FUNC_CACHE_PREFER_NONE. In that case, the current * context-wide setting will be used. *

*

This setting does nothing on devices * where the size of the L1 cache and shared memory are fixed. *

*

Launching a kernel with a different * preference than the most recent preference setting may insert a * device-side synchronization * point. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to configure cache for * @param config Requested cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetCacheConfig(CUfunction hfunc, int config) { return checkResult(cuFuncSetCacheConfigNative(hfunc, config)); } private static native int cuFuncSetCacheConfigNative(CUfunction hfunc, int config); /** * Sets the shared memory configuration for a device function. * *
     * CUresult cuFuncSetSharedMemConfig (
     *      CUfunction hfunc,
     *      CUsharedconfig config )
     * 
*
*

Sets the shared memory configuration for * a device function. On devices with configurable shared memory banks, * this function * will force all subsequent launches of * the specified device function to have the given shared memory bank size * configuration. * On any given launch of the function, the * shared memory configuration of the device will be temporarily changed * if needed to * suit the function's preferred * configuration. Changes in shared memory configuration between subsequent * launches of functions, * may introduce a device side synchronization * point. *

*

Any per-function setting of shared * memory bank size set via cuFuncSetSharedMemConfig will override the * context wide setting set with cuCtxSetSharedMemConfig. *

*

Changing the shared memory bank size * will not increase shared memory usage or affect occupancy of kernels, * but may have major * effects on performance. Larger bank sizes * will allow for greater potential bandwidth to shared memory, but will * change what * kinds of accesses to shared memory will * result in bank conflicts. *

*

This function will do nothing on devices * with fixed shared memory bank size. *

*

The supported bank configurations are: *

    *
  • *

    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: * use the context's shared memory configuration when launching this * function. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width * to be natively four bytes when launching this function. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank * width to be natively eight bytes when launching this function. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc kernel to be given a shared memory config * @param config requested shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuCtxSetSharedMemConfigcuFuncGetAttribute * @see JCudaDriver#cuLaunchKernel */ public static int cuFuncSetSharedMemConfig(CUfunction hfunc, int config) { return checkResult(cuFuncSetSharedMemConfigNative(hfunc, config)); } private static native int cuFuncSetSharedMemConfigNative(CUfunction hfunc, int config); /** * Creates a 1D or 2D CUDA array. * *
     * CUresult cuArrayCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY_DESCRIPTOR* pAllocateArray )
     * 
*
*

Creates a 1D or 2D CUDA array. Creates * a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY_DESCRIPTOR is defined as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *     } CUDA_ARRAY_DESCRIPTOR;
* where:

*
    *
  • *

    Width, and Height are the width, and height of the CUDA array (in elements); * the CUDA array is one-dimensional if height is 0, two-dimensional * otherwise; *

    *
  • *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*

Here are examples of CUDA array * descriptions: *

*

Description for a CUDA array of 2048 * floats: *

    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 1;
*

*

Description for a 64 x 64 CUDA array of * floats: *

    CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
*

*

Description for a width x height CUDA array of 64-bit, 4x16-bit float16's: *

     * CUDA_ARRAY_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
*

*

Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit * unsigned chars: *

    CUDA_ARRAY_DESCRIPTOR arrayDesc;
     *     desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
     *     desc.NumChannels = 2;
     *     desc.Width = width;
     *     desc.Height = height;
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned array * @param pAllocateArray Array descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray) { return checkResult(cuArrayCreateNative(pHandle, pAllocateArray)); } private static native int cuArrayCreateNative(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray); /** * Get a 1D or 2D CUDA array descriptor. * *
     * CUresult cuArrayGetDescriptor (
     *      CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
*
*

Get a 1D or 2D CUDA array descriptor. * Returns in *pArrayDescriptor a descriptor containing * information on the format and dimensions of the CUDA array hArray. It is useful for subroutines that have been passed a CUDA * array, but need to know the CUDA array parameters for validation * or other purposes. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArrayDescriptor Returned array descriptor * @param hArray Array to get descriptor of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray) { return checkResult(cuArrayGetDescriptorNative(pArrayDescriptor, hArray)); } private static native int cuArrayGetDescriptorNative(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray); /** * Returns the layout properties of a sparse CUDA array. * * Returns the layout properties of a sparse CUDA array in \p sparseProperties * If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE * ::CUDA_ERROR_INVALID_VALUE will be returned. * * If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array. Otherwise, it will be zero. * Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero. * Note that the \p array must have been allocated using ::cuArrayCreate or ::cuArray3DCreate. For CUDA arrays obtained * using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. Instead, ::cuMipmappedArrayGetSparseProperties * must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to. * * @return * CUDA_SUCCESS * CUDA_ERROR_INVALID_VALUE * * @param sparseProperties Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES * @param array CUDA array to get the sparse properties of * * @see JCudaDriver#cuMipmappedArrayGetSparseProperties * @see JCudaDriver#cuMemMapArrayAsync */ public static int cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUarray array) { return checkResult(cuArrayGetSparsePropertiesNative(sparseProperties, array)); } private static native int cuArrayGetSparsePropertiesNative(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUarray array); /** * Returns the layout properties of a sparse CUDA mipmapped array. * * Returns the sparse array layout properties in \p sparseProperties * If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE * ::CUDA_ERROR_INVALID_VALUE will be returned. * * For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the * size of the mip tail region. The mip tail region includes all mip levels whose width, height or depth * is less than that of the tile. * For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined. * Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer. * The returned value of ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero. * * @return * CUDA_SUCCESS * CUDA_ERROR_INVALID_VALUE * * @param sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES * @param mipmap - CUDA mipmapped array to get the sparse properties of * * @see JCudaDriver#cuArrayGetSparseProperties * @see JCudaDriver#cuMemMapArrayAsync */ public static int cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUmipmappedArray mipmap) { return checkResult(cuMipmappedArrayGetSparsePropertiesNative(sparseProperties, mipmap)); } private static native int cuMipmappedArrayGetSparsePropertiesNative(CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, CUmipmappedArray mipmap); /** *

     * \brief Gets a CUDA array plane from a CUDA array
     *
     * Returns in \p pPlaneArray a CUDA array that represents a single format plane
     * of the CUDA array \p hArray.
     *
     * If \p planeIdx is greater than the maximum number of planes in this array or if the array does
     * not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then ::CUDA_ERROR_INVALID_VALUE is returned.
     *
     * Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns
     * a CUDA array of the same size as \p hArray but with one channel and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
     * If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width
     * of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
     *
     * \param pPlaneArray   - Returned CUDA array referenced by the \p planeIdx
     * \param hArray        - Multiplanar CUDA array
     * \param planeIdx      - Plane index
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_INVALID_HANDLE
     * \notefnerr
     *
     * \sa
     * ::cuArrayCreate,
     * ::cudaGetArrayPlane
     * 
*/ public static int cuArrayGetPlane(CUarray pPlaneArray, CUarray hArray, int planeIdx) { return checkResult(cuArrayGetPlaneNative(pPlaneArray, hArray, planeIdx)); } private static native int cuArrayGetPlaneNative(CUarray pPlaneArray, CUarray hArray, int planeIdx); /** * Destroys a CUDA array. * *
     * CUresult cuArrayDestroy (
     *      CUarray hArray )
     * 
*
*

Destroys a CUDA array. Destroys the CUDA * array hArray. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hArray Array to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ARRAY_IS_MAPPED * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArrayDestroy(CUarray hArray) { return checkResult(cuArrayDestroyNative(hArray)); } private static native int cuArrayDestroyNative(CUarray hArray); /** * Creates a 3D CUDA array. * *
     * CUresult cuArray3DCreate (
     *      CUarray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray )
     * 
*
*

Creates a 3D CUDA array. Creates a CUDA * array according to the CUDA_ARRAY3D_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY3D_DESCRIPTOR is defined as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
* where:

*
    *
  • *
    * Width, Height, and Depth are the width, height, and depth of * the CUDA array (in elements); the following types of CUDA arrays can * be allocated: *
      *
    • *

      A 1D array is allocated * if Height and Depth extents are both zero. *

      *
    • *
    • *

      A 2D array is allocated * if only Depth extent is zero. *

      *
    • *
    • *

      A 3D array is allocated * if all three extents are non-zero. *

      *
    • *
    • *

      A 1D layered CUDA * array is allocated if only Height is zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A 2D layered CUDA * array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A cubemap CUDA array * is allocated if all three extents are non-zero and the CUDA_ARRAY3D_CUBEMAP * flag is set. Width must be equal to Height, and * Depth must be six. A cubemap is a special type of 2D layered * CUDA array, where the six layers represent the six faces of a cube. * The order of the six * layers in memory is the same as that listed in CUarray_cubemap_face. *

      *
    • *
    • *

      A cubemap layered CUDA * array is allocated if all three extents are non-zero, and both, * CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must * be a multiple of six. A cubemap layered CUDA array is a special type * of 2D layered CUDA array that consists of a collection * of cubemaps. The first * six layers represent the first cubemap, the next six layers form the * second cubemap, and so on. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
*

*
    *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*
    *
  • *
    * Flags may be set to *
      *
    • *

      CUDA_ARRAY3D_LAYERED * to enable creation of layered CUDA arrays. If this flag is set, Depth specifies the number of layers, not the depth of a 3D * array. *

      *
    • *
    • *

      CUDA_ARRAY3D_SURFACE_LDST * to enable surface references to be bound to the CUDA array. If this * flag is not set, cuSurfRefSetArray will fail when attempting to bind * the CUDA array to a surface reference. *

      *
    • *
    • *

      CUDA_ARRAY3D_CUBEMAP * to enable creation of cubemaps. If this flag is set, Width * must be equal to Height, and Depth must be six. If * the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must * be a multiple of six. *

      *
    • *
    • *

      CUDA_ARRAY3D_TEXTURE_GATHER * to indicate that the CUDA array will be used for texture gather. * Texture gather can only be performed on 2D CUDA arrays. *

      *
    • *
    *
    *
  • *
*

*

Width, Height and * Depth must meet certain size requirements as listed in the * following table. All values are specified in elements. Note that for * brevity's sake, the full name of the * device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to * the device attribute * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH. *

*

Note that 2D CUDA arrays have different * size requirements if the CUDA_ARRAY3D_TEXTURE_GATHER flag is set. Width and Height must not be greater than * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH and * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in * that case. *

*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*

CUDA array * type *

*
*

Valid extents * that must always be met * {(width range in * elements), (height range), (depth range)} *

*
*

Valid extents * with CUDA_ARRAY3D_SURFACE_LDST set * {(width range in * elements), (height range), (depth range)} *

*
*

1D

*
*

{ (1,TEXTURE1D_WIDTH), * 0, 0 } *

*
*

{ (1,SURFACE1D_WIDTH), * 0, 0 } *

*
*

2D

*
*

{ (1,TEXTURE2D_WIDTH), * (1,TEXTURE2D_HEIGHT), 0 } *

*
*

{ (1,SURFACE2D_WIDTH), * (1,SURFACE2D_HEIGHT), 0 } *

*
*

3D

*
*

{ (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } *

*
*

{ (1,SURFACE3D_WIDTH), * (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) } *

*
*

1D Layered

*
*

{ * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) } *

*
*

2D Layered

*
*

{ * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) } *

*
*

Cubemap

*
*

{ (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } *

*
*

{ (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 } *

*
*

Cubemap Layered

*
*

{ * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } *

*
*

{ * (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) } *

*
*
*

*

Here are examples of CUDA array * descriptions: *

*

Description for a CUDA array of 2048 * floats: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 2048;
     *     desc.Height = 0;
     *     desc.Depth = 0;
*

*

Description for a 64 x 64 CUDA array of * floats: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.Format = CU_AD_FORMAT_FLOAT;
     *     desc.NumChannels = 1;
     *     desc.Width = 64;
     *     desc.Height = 64;
     *     desc.Depth = 0;
*

*

Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16's: *

    CUDA_ARRAY3D_DESCRIPTOR desc;
     *     desc.FormatFlags = CU_AD_FORMAT_HALF;
     *     desc.NumChannels = 4;
     *     desc.Width = width;
     *     desc.Height = height;
     *     desc.Depth = depth;
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned array * @param pAllocateArray 3D array descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuArray3DGetDescriptor * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray) { return checkResult(cuArray3DCreateNative(pHandle, pAllocateArray)); } private static native int cuArray3DCreateNative(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray); /** * Get a 3D CUDA array descriptor. * *
     * CUresult cuArray3DGetDescriptor (
     *      CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor,
     *      CUarray hArray )
     * 
*
*

Get a 3D CUDA array descriptor. Returns * in *pArrayDescriptor a descriptor containing information on * the format and dimensions of the CUDA array hArray. It is * useful for subroutines that have been passed a CUDA array, but need to * know the CUDA array parameters for validation * or other purposes. *

*

This function may be called on 1D and * 2D arrays, in which case the Height and/or Depth * members of the descriptor struct will be set to 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArrayDescriptor Returned 3D array descriptor * @param hArray 3D array to get descriptor of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArrayDestroy * @see JCudaDriver#cuArrayGetDescriptor * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemAllocPitch * @see JCudaDriver#cuMemcpy2D * @see JCudaDriver#cuMemcpy2DAsync * @see JCudaDriver#cuMemcpy2DUnaligned * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuMemcpy3DAsync * @see JCudaDriver#cuMemcpyAtoA * @see JCudaDriver#cuMemcpyAtoD * @see JCudaDriver#cuMemcpyAtoH * @see JCudaDriver#cuMemcpyAtoHAsync * @see JCudaDriver#cuMemcpyDtoA * @see JCudaDriver#cuMemcpyDtoD * @see JCudaDriver#cuMemcpyDtoDAsync * @see JCudaDriver#cuMemcpyDtoH * @see JCudaDriver#cuMemcpyDtoHAsync * @see JCudaDriver#cuMemcpyHtoA * @see JCudaDriver#cuMemcpyHtoAAsync * @see JCudaDriver#cuMemcpyHtoD * @see JCudaDriver#cuMemcpyHtoDAsync * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemGetAddressRange * @see JCudaDriver#cuMemGetInfo * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostGetDevicePointer * @see JCudaDriver#cuMemsetD2D8 * @see JCudaDriver#cuMemsetD2D16 * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuMemsetD8 * @see JCudaDriver#cuMemsetD16 * @see JCudaDriver#cuMemsetD32 */ public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray) { return checkResult(cuArray3DGetDescriptorNative(pArrayDescriptor, hArray)); } private static native int cuArray3DGetDescriptorNative(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray); /** * Creates a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayCreate (
     *      CUmipmappedArray* pHandle,
     *      const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
     *      unsigned int  numMipmapLevels )
     * 
*
*

Creates a CUDA mipmapped array. Creates * a CUDA mipmapped array according to the CUDA_ARRAY3D_DESCRIPTOR * structure pMipmappedArrayDesc and returns a handle to the * new CUDA mipmapped array in *pHandle. numMipmapLevels * specifies the number of mipmap levels to be allocated. This value is * clamped to the range [1, 1 + floor(log2(max(width, height, * depth)))]. *

*

The CUDA_ARRAY3D_DESCRIPTOR is defined * as: *

*
    typedef struct {
     *         unsigned int Width;
     *         unsigned int Height;
     *         unsigned int Depth;
     *         CUarray_format Format;
     *         unsigned int NumChannels;
     *         unsigned int Flags;
     *     } CUDA_ARRAY3D_DESCRIPTOR;
* where:

*
    *
  • *
    * Width, Height, and Depth are the width, height, and depth of * the CUDA array (in elements); the following types of CUDA arrays can * be allocated: *
      *
    • *

      A 1D mipmapped array * is allocated if Height and Depth extents are both * zero. *

      *
    • *
    • *

      A 2D mipmapped array * is allocated if only Depth extent is zero. *

      *
    • *
    • *

      A 3D mipmapped array * is allocated if all three extents are non-zero. *

      *
    • *
    • *

      A 1D layered CUDA * mipmapped array is allocated if only Height is zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A 2D layered CUDA * mipmapped array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number * of layers is determined by the depth extent. *

      *
    • *
    • *

      A cubemap CUDA * mipmapped array is allocated if all three extents are non-zero and the * CUDA_ARRAY3D_CUBEMAP flag is set. Width must be equal to Height, and Depth must be six. A cubemap is a special * type of 2D layered CUDA array, where the six layers represent the six * faces of a cube. * The order of the six * layers in memory is the same as that listed in CUarray_cubemap_face. *

      *
    • *
    • *

      A cubemap layered CUDA * mipmapped array is allocated if all three extents are non-zero, and * both, CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must * be a multiple of six. A cubemap layered CUDA array is a special type * of 2D layered CUDA array that consists of a collection * of cubemaps. The first * six layers represent the first cubemap, the next six layers form the * second cubemap, and so on. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *
    * Format specifies the format * of the elements; CUarray_format is defined as: *
        typedef enum
         * CUarray_format_enum {
         *         CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
         *         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
         *         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
         *         CU_AD_FORMAT_SIGNED_INT8 = 0x08,
         *         CU_AD_FORMAT_SIGNED_INT16 = 0x09,
         *         CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
         *         CU_AD_FORMAT_HALF = 0x10,
         *         CU_AD_FORMAT_FLOAT = 0x20
         *     } CUarray_format;
    *
    *
  • *
*

*
    *
  • *

    NumChannels specifies * the number of packed components per CUDA array element; it may be 1, * 2, or 4; *

    *
  • *
*

*
    *
  • *
    * Flags may be set to *
      *
    • *

      CUDA_ARRAY3D_LAYERED * to enable creation of layered CUDA mipmapped arrays. If this flag is * set, Depth specifies the number of layers, not the depth of * a 3D array. *

      *
    • *
    • *

      CUDA_ARRAY3D_SURFACE_LDST * to enable surface references to be bound to individual mipmap levels * of the CUDA mipmapped array. If this flag is not set, * cuSurfRefSetArray will * fail when attempting to bind a mipmap level of the CUDA mipmapped array * to a surface reference. *

      *
    • *
    • *

      CUDA_ARRAY3D_CUBEMAP * to enable creation of mipmapped cubemaps. If this flag is set, Width must be equal to Height, and Depth must * be six. If the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must be a multiple of six. *

      *
    • *
    • *

      CUDA_ARRAY3D_TEXTURE_GATHER * to indicate that the CUDA mipmapped array will be used for texture * gather. Texture gather can only be performed on 2D CUDA * mipmapped arrays. *

      *
    • *
    *
    *
  • *
*

*

Width, Height and * Depth must meet certain size requirements as listed in the * following table. All values are specified in elements. Note that for * brevity's sake, the full name of the * device attribute is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH * refers to the device * attribute * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH. *

*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*

CUDA array * type *

*
*

Valid extents * that must always be met * {(width range in * elements), (height range), (depth range)} *

*
*

1D

*
*

{ * (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 } *

*
*

2D

*
*

{ * (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 } *

*
*

3D

*
*

{ (1,TEXTURE3D_WIDTH), * (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } * OR * { * (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) } *

*
*

1D Layered

*
*

{ * (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) } *

*
*

2D Layered

*
*

{ * (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) } *

*
*

Cubemap

*
*

{ (1,TEXTURECUBEMAP_WIDTH), * (1,TEXTURECUBEMAP_WIDTH), 6 } *

*
*

Cubemap Layered

*
*

{ * (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) } *

*
*
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pHandle Returned mipmapped array * @param pMipmappedArrayDesc mipmapped array descriptor * @param numMipmapLevels Number of mipmap levels * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuMipmappedArrayDestroy * @see JCudaDriver#cuMipmappedArrayGetLevel * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayCreate(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels) { return checkResult(cuMipmappedArrayCreateNative(pHandle, pMipmappedArrayDesc, numMipmapLevels)); } private static native int cuMipmappedArrayCreateNative(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels); /** * Gets a mipmap level of a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayGetLevel (
     *      CUarray* pLevelArray,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  level )
     * 
*
*

Gets a mipmap level of a CUDA mipmapped * array. Returns in *pLevelArray a CUDA array that represents * a single mipmap level of the CUDA mipmapped array hMipmappedArray. *

*

If level is greater than the * maximum number of levels in this mipmapped array, CUDA_ERROR_INVALID_VALUE * is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pLevelArray Returned mipmap level CUDA array * @param hMipmappedArray CUDA mipmapped array * @param level Mipmap level * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuMipmappedArrayCreate * @see JCudaDriver#cuMipmappedArrayDestroy * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayGetLevel(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level) { return checkResult(cuMipmappedArrayGetLevelNative(pLevelArray, hMipmappedArray, level)); } private static native int cuMipmappedArrayGetLevelNative(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level); /** * Destroys a CUDA mipmapped array. * *
     * CUresult cuMipmappedArrayDestroy (
     *      CUmipmappedArray hMipmappedArray )
     * 
*
*

Destroys a CUDA mipmapped array. Destroys * the CUDA mipmapped array hMipmappedArray. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hMipmappedArray Mipmapped array to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ARRAY_IS_MAPPED * * @see JCudaDriver#cuMipmappedArrayCreate * @see JCudaDriver#cuMipmappedArrayGetLevel * @see JCudaDriver#cuArrayCreate */ public static int cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { return checkResult(cuMipmappedArrayDestroyNative(hMipmappedArray)); } private static native int cuMipmappedArrayDestroyNative(CUmipmappedArray hMipmappedArray); /** * Allocate an address range reservation.
*
* Reserves a virtual address range based on the given parameters, giving * the starting address of the range in \p ptr. This API requires a system that * supports UVA. The size and address parameters must be a multiple of the * host page size and the alignment must be a power of two or zero for default * alignment. * * @param ptr Resulting pointer to start of virtual address range allocated * @param size Size of the reserved virtual address range requested * @param alignment - Alignment of the reserved virtual address range requested * @param addr Fixed starting address range requested * @param flags Currently unused, must be zero * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemAddressFree */ public static int cuMemAddressReserve(CUdeviceptr ptr, long size, long alignment, CUdeviceptr addr, long flags) { return checkResult(cuMemAddressReserveNative(ptr, size, alignment, addr, flags)); } private static native int cuMemAddressReserveNative(CUdeviceptr ptr, long size, long alignment, CUdeviceptr addr, long flags); /** * Free an address range reservation.
*
* Frees a virtual address range reserved by cuMemAddressReserve. The size * must match what was given to memAddressReserve and the ptr given must * match what was returned from memAddressReserve. * * @param ptr Starting address of the virtual address range to free * @param size Size of the virtual address region to free * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemAddressReserve */ public static int cuMemAddressFree(CUdeviceptr ptr, long size) { return checkResult(cuMemAddressFreeNative(ptr, size)); } private static native int cuMemAddressFreeNative(CUdeviceptr ptr, long size); /** * Create a shareable memory handle representing a memory allocation of a * given size described by the given properties.
*
* This creates a memory allocation on the target device specified through the * \p prop strcuture. The created allocation will not have any device or host * mappings. The generic memory \p handle for the allocation can be * mapped to the address space of calling process via ::cuMemMap. This handle * cannot be transmitted directly to other processes (see * ::cuMemExportToShareableHandle). On Windows, the caller must also pass * an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which * limits or allows access to this handle for a recepient process (see * ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this * allocation must be a multiple of the the value given via * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM * flag. * * @param handle Value of handle returned. All operations on this allocation are to be performed using this handle. * @param size Size of the allocation requested * @param prop Properties of the allocation to create. * @param flags flags for future use, must be zero now. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemRelease * @see JCudaDriver#cuMemExportToShareableHandle * @see JCudaDriver#cuMemImportFromShareableHandle */ public static int cuMemCreate(CUmemGenericAllocationHandle handle, long size, CUmemAllocationProp prop, long flags) { return checkResult(cuMemCreateNative(handle, size, prop, flags)); } private static native int cuMemCreateNative(CUmemGenericAllocationHandle handle, long size, CUmemAllocationProp prop, long flags); /** * Release a memory handle representing a memory allocation which was * previously allocated through cuMemCreate.
*
* Frees the memory that was allocated on a device through cuMemCreate.
*
* The memory allocation will be freed when all outstanding mappings to the memory * are unmapped and when all outstanding references to the handle (including it's * shareable counterparts) are also released. The generic memory handle can be * freed when there are still outstanding mappings made with this handle. Each * time a recepient process imports a shareable handle, it needs to pair it with * ::cuMemRelease for the handle to be freed. If \p handle is not a valid handle * the behavior is undefined. * * @param handle Value of handle which was returned previously by cuMemCreate. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver cuMemCreate */ public static int cuMemRelease(CUmemGenericAllocationHandle handle) { return checkResult(cuMemReleaseNative(handle)); } private static native int cuMemReleaseNative(CUmemGenericAllocationHandle handle); /** * Maps an allocation handle to a reserved virtual address range.
*
* Maps bytes of memory represented by \p handle starting from byte \p offset to * \p size to address range [\p addr, \p addr + \p size]. This range must be an * address reservation previously reserved with ::cuMemAddressReserve, and * \p offset + \p size must be less than the size of the memory allocation. * Both \p ptr, \p size, and \p offset must be a multiple of the value given via * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM flag.
*
* Please note calling ::cuMemMap does not make the address accessible, * the caller needs to update accessibility of a contiguous mapped VA * range by calling ::cuMemSetAccess.
*
* Once a recipient process obtains a shareable memory handle * from ::cuMemImportFromShareableHandle, the process must * use ::cuMemMap to map the memory into its address ranges before * setting accessibility with ::cuMemSetAccess.
*
* ::cuMemMap can only create mappings on VA range reservations * that are not currently mapped. * * @param ptr Address where memory will be mapped. * @param size Size of the memory mapping. * @param offset Offset into the memory represented by * - \p handle from which to start mapping * - Note: currently must be zero. * @param handle Handle to a shareable memory * @param flags flags for future use, must be zero now. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemUnmap * @see JCudaDriver#cuMemSetAccess * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemAddressReserve * @see JCudaDriver#cuMemImportFromShareableHandle */ public static int cuMemMap(CUdeviceptr ptr, long size, long offset, CUmemGenericAllocationHandle handle, long flags) { return checkResult(cuMemMapNative(ptr, size, offset, handle, flags)); } private static native int cuMemMapNative(CUdeviceptr ptr, long size, long offset, CUmemGenericAllocationHandle handle, long flags); /** * Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. *

* Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. * Each operation is specified by a ::CUarrayMapInfo entry in the \p mapInfoList array of size \p count. * The structure ::CUarrayMapInfo is defined in {@link CUarrayMapInfo} * where ::CUarrayMapInfo::resourceType specifies the type of resource to be operated on. * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_ARRAY then * ::CUarrayMapInfo::resource::array must be set to a valid sparse CUDA array handle. * The CUDA array must be either a 2D, 2D layered or 3D CUDA array and must have been allocated using * ::cuArrayCreate or ::cuArray3DCreate with the flag ::CUDA_ARRAY3D_SPARSE. * For CUDA arrays obtained using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY * then ::CUarrayMapInfo::resource::mipmap must be set to a valid sparse CUDA mipmapped array handle. * The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been * allocated using ::cuMipmappedArrayCreate with the flag ::CUDA_ARRAY3D_SPARSE. *

* ::CUarrayMapInfo::subresourceType specifies the type of subresource within the resource. * ::CUarraySparseSubresourceType_enum is defined as {@link CUarraySparseSubresourceType} * where ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL indicates a * sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels which * are too small to span at least one tile in any dimension constitute the mip tail region as indicated by * ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL subresource type. *

* If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL * then ::CUarrayMapInfo::subresource::sparseLevel struct must contain valid array subregion offsets and extents. * The ::CUarrayMapInfo::subresource::sparseLevel::offsetX, ::CUarrayMapInfo::subresource::sparseLevel::offsetY * and ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must specify valid X, Y and Z offsets respectively. * The ::CUarrayMapInfo::subresource::sparseLevel::extentWidth, ::CUarrayMapInfo::subresource::sparseLevel::extentHeight * and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth must specify valid width, height and depth extents respectively. * These offsets and extents must be aligned to the corresponding tile dimension. * For CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::level must specify a valid mip level index. Otherwise, * must be zero. * For layered CUDA arrays and layered CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::layer must specify a valid layer index. Otherwise, * must be zero. * ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must be zero and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth * must be set to 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays. * Tile extents can be obtained by calling ::cuArrayGetSparseProperties and ::cuMipmappedArrayGetSparseProperties *

* If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL * then ::CUarrayMapInfo::subresource::miptail struct must contain valid mip tail offset in * ::CUarrayMapInfo::subresource::miptail::offset and size in ::CUarrayMapInfo::subresource::miptail::size. * Both, mip tail offset and mip tail size must be aligned to the tile size. * For layered CUDA mipmapped arrays which don't have the flag ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL set in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags * as returned by ::cuMipmappedArrayGetSparseProperties, ::CUarrayMapInfo::subresource::miptail::layer must specify a valid layer index. * Otherwise, must be zero. *

* ::CUarrayMapInfo::memOperationType specifies the type of operation. ::CUmemOperationType is defined as * {@link CUmemOperationType}. * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP then the subresource * will be mapped onto the tile pool memory specified by ::CUarrayMapInfo::memHandle at offset ::CUarrayMapInfo::offset. * The tile pool allocation has to be created by specifying the ::CU_MEM_CREATE_USAGE_TILE_POOL flag when calling ::cuMemCreate. Also, * ::CUarrayMapInfo::memHandleType must be set to ::CUmemHandleType::CU_MEM_HANDLE_TYPE_GENERIC. *

* If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_UNMAP then an unmapping operation * is performed. ::CUarrayMapInfo::memHandle must be NULL. *

* ::CUarrayMapInfo::deviceBitMask specifies the list of devices that must map or unmap physical memory. * Currently, this mask must have exactly one bit set, and the corresponding device must match the device associated with the stream. * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP, the device must also match * the device associated with the tile pool memory allocation as specified by ::CUarrayMapInfo::memHandle. *

* ::CUarrayMapInfo::flags and ::CUarrayMapInfo::reserved[] are unused and must be set to zero. * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE * * @param mapInfoList List of ::CUarrayMapInfo * @param count Count of ::CUarrayMapInfo in \p mapInfoList * @param hStream Stream identifier for the stream to use for map or unmap operations * * @see JCudaDriver#cuMipmappedArrayCreate * @see JCudaDriver#cuArrayCreate * @see JCudaDriver#cuArray3DCreate * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuArrayGetSparseProperties * @see JCudaDriver#cuMipmappedArrayGetSparseProperties */ public static int cuMemMapArrayAsync(CUarrayMapInfo mapInfoList[], int count, CUstream hStream) { return checkResult(cuMemMapArrayAsyncNative(mapInfoList, count, hStream)); } private static native int cuMemMapArrayAsyncNative(CUarrayMapInfo mapInfoList[], int count, CUstream hStream); /** * Unmap the backing memory of a given address range.
*
* The range must be the entire contiguous address range that was mapped to. In * other words, ::cuMemUnmap cannot unmap a sub-range of an address range mapped * by ::cuMemCreate / ::cuMemMap. Any backing memory allocations will be freed * if there are no existing mappings and there are no unreleased memory handles.
*
* When ::cuMemUnmap returns successfully the address range is converted to an * address reservation and can be used for a future calls to ::cuMemMap. Any new * mapping to this virtual address will need to have access granted through * ::cuMemSetAccess, as all mappings start with no accessibility setup. * * @param ptr Starting address for the virtual address range to unmap * @param size Size of the virtual address range to unmap * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemAddressReserve */ public static int cuMemUnmap(CUdeviceptr ptr, long size) { return checkResult(cuMemUnmapNative(ptr, size)); } private static native int cuMemUnmapNative(CUdeviceptr ptr, long size); /** * Set the access flags for each location specified in \p desc for the given virtual address range.
*
* Given the virtual address range via \p ptr and \p size, and the locations * in the array given by \p desc and \p count, set the access flags for the * target locations. The range must be a fully mapped address range * containing all allocations created by ::cuMemMap / ::cuMemCreate.
* * @param ptr Starting address for the virtual address range * @param size Length of the virtual address range * @param desc Array of ::CUmemAccessDesc that describe how to change the * mapping for each location specified * @param count Number of ::CUmemAccessDesc in \p desc * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemSetAccess * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemMap */ public static int cuMemSetAccess(CUdeviceptr ptr, long size, CUmemAccessDesc desc[], long count) { return checkResult(cuMemSetAccessNative(ptr, size, desc, count)); } private static native int cuMemSetAccessNative(CUdeviceptr ptr, long size, CUmemAccessDesc desc[], long count); /** * Get the access \p flags set for the given \p location and \p ptr
*
* @param flags Flags set for this location * @param location Location in which to check the flags for * @param ptr Address in which to check the access flags for * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemSetAccess */ public static int cuMemGetAccess(long flags[], CUmemLocation location, CUdeviceptr ptr) { return checkResult(cuMemGetAccessNative(flags, location, ptr)); } private static native int cuMemGetAccessNative(long flags[], CUmemLocation location, CUdeviceptr ptr); /** * Exports an allocation to a requested shareable handle type.
*
* Given a CUDA memory handle, create a shareable memory * allocation handle that can be used to share the memory with other * processes. The recipient process can convert the shareable handle back into a * CUDA memory handle using ::cuMemImportFromShareableHandle and map * it with ::cuMemMap. The implementation of what this handle is and how it * can be transferred is defined by the requested handle type in \p handleType
*
* Once all shareable handles are closed and the allocation is released, the allocated * memory referenced will be released back to the OS and uses of the CUDA handle afterward * will lead to undefined behavior.
*
* This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) * that support importing memory from the shareable type
*
* @param shareableHandle Pointer to the location in which to store the requested handle type * @param handle CUDA handle for the memory allocation * @param handleType Type of shareable handle requested (defines type and size of the \p shareableHandle output parameter) * @param flags Reserved, must be zero * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemImportFromShareableHandle */ public static int cuMemExportToShareableHandle(Pointer shareableHandle, CUmemGenericAllocationHandle handle, int handleType, long flags) { return checkResult(cuMemExportToShareableHandleNative(shareableHandle, handle, handleType, flags)); } private static native int cuMemExportToShareableHandleNative(Pointer shareableHandle, CUmemGenericAllocationHandle handle, int handleType, long flags); /** * Imports an allocation from a requested shareable handle type.
*
* If the current process cannot support the memory described by this shareable * handle, this API will error as CUDA_ERROR_NOT_SUPPORTED.
*
* \note Importing shareable handles exported from some graphics APIs(Vulkan, OpenGL, etc) * created on devices under an SLI group may not be supported, and thus this API will * return CUDA_ERROR_NOT_SUPPORTED. * There is no guarantee that the contents of \p handle will be the same CUDA memory handle * for the same given OS shareable handle, or the same underlying allocation.
* * @param handle CUDA Memory handle for the memory allocation. * @param osHandle Shareable Handle representing the memory allocation that is to be imported. * @param shHandleType handle type of the exported handle ::CUmemAllocationHandleType. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemExportToShareableHandle * @see JCudaDriver#cuMemMap * @see JCudaDriver#cuMemRelease */ public static int cuMemImportFromShareableHandle(CUmemGenericAllocationHandle handle, Pointer osHandle, int shHandleType) { return checkResult(cuMemImportFromShareableHandleNative(handle, osHandle, shHandleType)); } private static native int cuMemImportFromShareableHandleNative(CUmemGenericAllocationHandle handle, Pointer osHandle, int shHandleType); /** * Calculates either the minimal or recommended granularity
*
* Calculates either the minimal or recommended granularity * for a given allocation specification and returns it in granularity. This * granularity can be used as a multiple for alignment, size, or address mapping. * * @param granularity Returned granularity. * @param prop Property for which to determine the granularity for * @param option Determines which granularity to return * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemMap */ public static int cuMemGetAllocationGranularity(long granularity[], CUmemAllocationProp prop, int option) { return checkResult(cuMemGetAllocationGranularityNative(granularity, prop, option)); } private static native int cuMemGetAllocationGranularityNative(long granularity[], CUmemAllocationProp prop, int option); /** * Retrieve the contents of the property structure defining properties for this handle * * @param prop Pointer to a properties structure which will hold the information about this handle * @param handle Handle which to perform the query on * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemImportFromShareableHandle */ public static int cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp prop, CUmemGenericAllocationHandle handle) { return checkResult(cuMemGetAllocationPropertiesFromHandleNative(prop, handle)); } private static native int cuMemGetAllocationPropertiesFromHandleNative(CUmemAllocationProp prop, CUmemGenericAllocationHandle handle); /** * Given an address addr, returns the allocation handle of the backing memory allocation. * * The handle is guaranteed to be the same handle value used to map the memory. If the address * requested is not mapped, the function will fail. The returned handle must be released with * corresponding number of calls to ::cuMemRelease. * * The address addr, can be any address in a range previously mapped * by ::cuMemMap, and not necessarily the start address. * * @param handle CUDA Memory handle for the backing memory allocation. * @param addr Memory address to query, that has been mapped previously. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuMemCreate * @see JCudaDriver#cuMemRelease * @see JCudaDriver#cuMemMap */ public static int cuMemRetainAllocationHandle(CUmemGenericAllocationHandle handle, Pointer addr) { return checkResult(cuMemRetainAllocationHandleNative(handle, addr)); } private static native int cuMemRetainAllocationHandleNative(CUmemGenericAllocationHandle handle, Pointer addr); /** *
     * Frees memory with stream ordered semantics.
     *
     * Inserts a free operation into \p hStream.
     * The allocation must not be accessed after stream execution reaches the free.
     * After this API returns, accessing the memory from any subsequent work launched on the GPU
     * or querying its pointer attributes results in undefined behavior.
     * 
     * \param dptr - memory to free
     * \param hStream - The stream establishing the stream ordering contract. 
     * \returns
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context),
     * ::CUDA_ERROR_NOT_SUPPORTED
     * 
*/ public static int cuMemFreeAsync( CUdeviceptr dptr, CUstream hStream) { return checkResult(cuMemFreeAsyncNative(dptr, hStream)); } private static native int cuMemFreeAsyncNative( CUdeviceptr dptr, CUstream hStream); /** * Creates a texture reference. * *
     * CUresult cuTexRefCreate (
     *      CUtexref* pTexRef )
     * 
*
*

Creates a texture reference. * Deprecated Creates a texture reference * and returns its handle in *pTexRef. Once created, the * application must call cuTexRefSetArray() or cuTexRefSetAddress() to * associate the reference with allocated memory. Other texture reference * functions are used to specify the format and interpretation * (addressing, filtering, etc.) to be used * when the memory is read through this texture reference. *

*
* * @param pTexRef Returned texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefDestroy * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuTexRefCreate(CUtexref pTexRef) { return checkResult(cuTexRefCreateNative(pTexRef)); } private static native int cuTexRefCreateNative(CUtexref pTexRef); /** * Destroys a texture reference. * *
     * CUresult cuTexRefDestroy (
     *      CUtexref hTexRef )
     * 
*
*

Destroys a texture reference. * Deprecated Destroys the texture reference * specified by hTexRef. *

*
* * @param hTexRef Texture reference to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefCreate * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuTexRefDestroy(CUtexref hTexRef) { return checkResult(cuTexRefDestroyNative(hTexRef)); } private static native int cuTexRefDestroyNative(CUtexref hTexRef); /** * Binds an array as a texture reference. * *
     * CUresult cuTexRefSetArray (
     *      CUtexref hTexRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
*
*

Binds an array as a texture reference. * Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state associated with * the texture reference is superseded by this function. Flags * must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound * to hTexRef is unbound. *

*
* * @param hTexRef Texture reference to bind * @param hArray Array to bind * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags) { return checkResult(cuTexRefSetArrayNative(hTexRef, hArray, Flags)); } private static native int cuTexRefSetArrayNative(CUtexref hTexRef, CUarray hArray, int Flags); /** * Binds a mipmapped array to a texture reference. * *
     * CUresult cuTexRefSetMipmappedArray (
     *      CUtexref hTexRef,
     *      CUmipmappedArray hMipmappedArray,
     *      unsigned int  Flags )
     * 
*
*

Binds a mipmapped array to a texture * reference. Binds the CUDA mipmapped array hMipmappedArray * to the texture reference hTexRef. Any previous address or * CUDA array state associated with the texture reference is superseded * by this function. Flags must be set to CU_TRSA_OVERRIDE_FORMAT. * Any CUDA array previously bound to hTexRef is unbound. *

*
* * @param hTexRef Texture reference to bind * @param hMipmappedArray Mipmapped array to bind * @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags) { return checkResult(cuTexRefSetMipmappedArrayNative(hTexRef, hMipmappedArray, Flags)); } private static native int cuTexRefSetMipmappedArrayNative(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags); /** * Binds an address as a texture reference. * *
     * CUresult cuTexRefSetAddress (
     *      size_t* ByteOffset,
     *      CUtexref hTexRef,
     *      CUdeviceptr dptr,
     *      size_t bytes )
     * 
*
*

Binds an address as a texture reference. * Binds a linear address range to the texture reference hTexRef. * Any previous address or CUDA array state associated with the texture * reference is superseded by this function. Any memory * previously bound to hTexRef is * unbound. *

*

Since the hardware enforces an alignment * requirement on texture base addresses, cuTexRefSetAddress() passes back * a byte offset in *ByteOffset that must be applied to texture * fetches in order to read from the desired memory. This offset must be * divided by the texel * size and passed to kernels that read from * the texture so they can be applied to the tex1Dfetch() function. *

*

If the device memory pointer was returned * from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be * passed as the ByteOffset parameter. *

*

The total number of elements (or texels) * in the linear address range cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of * elements is computed as (bytes / bytesPerElement), where * bytesPerElement is determined from the data format and number of * components set using cuTexRefSetFormat(). *

*
* * @param ByteOffset Returned byte offset * @param hTexRef Texture reference to bind * @param dptr Device pointer to bind * @param bytes Size of memory to bind in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetAddress(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes) { return checkResult(cuTexRefSetAddressNative(ByteOffset, hTexRef, dptr, bytes)); } private static native int cuTexRefSetAddressNative(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes); /** * Sets the format for a texture reference. * *
     * CUresult cuTexRefSetFormat (
     *      CUtexref hTexRef,
     *      CUarray_format fmt,
     *      int  NumPackedComponents )
     * 
*
*

Sets the format for a texture reference. * Specifies the format of the data to be read by the texture reference * hTexRef. fmt and NumPackedComponents are * exactly analogous to the Format and NumChannels members of the * CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each * component and the number of components per array element. *

*
* * @param hTexRef Texture reference * @param fmt Format to set * @param NumPackedComponents Number of components per array element * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents) { return checkResult(cuTexRefSetFormatNative(hTexRef, fmt, NumPackedComponents)); } private static native int cuTexRefSetFormatNative(CUtexref hTexRef, int fmt, int NumPackedComponents); /** * Binds an address as a 2D texture reference. * *
     * CUresult cuTexRefSetAddress2D (
     *      CUtexref hTexRef,
     *      const CUDA_ARRAY_DESCRIPTOR* desc,
     *      CUdeviceptr dptr,
     *      size_t Pitch )
     * 
*
*

Binds an address as a 2D texture * reference. Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with * the texture reference is superseded by this function. Any memory * previously bound to hTexRef is * unbound. *

*

Using a tex2D() function inside a kernel * requires a call to either cuTexRefSetArray() to bind the corresponding * texture reference to an array, or cuTexRefSetAddress2D() to bind the * texture reference to linear memory. *

*

Function calls to cuTexRefSetFormat() * cannot follow calls to cuTexRefSetAddress2D() for the same texture * reference. *

*

It is required that dptr be * aligned to the appropriate hardware-specific texture alignment. You * can query this value using the device attribute * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned dptr * is supplied, CUDA_ERROR_INVALID_VALUE is returned. *

*

Pitch has to be aligned to * the hardware-specific texture pitch alignment. This value can be * queried using the device attribute * CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. * If an unaligned Pitch is supplied, CUDA_ERROR_INVALID_VALUE * is returned. *

*

Width and Height, which are specified * in elements (or texels), cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. Pitch, which is specified in bytes, cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. *

*
* * @param hTexRef Texture reference to bind * @param desc Descriptor of CUDA array * @param dptr Device pointer to bind * @param Pitch Line pitch in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes) { return checkResult(cuTexRefSetAddress2DNative(hTexRef, desc, dptr, PitchInBytes)); } private static native int cuTexRefSetAddress2DNative(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes); /** * Sets the addressing mode for a texture reference. * *
     * CUresult cuTexRefSetAddressMode (
     *      CUtexref hTexRef,
     *      int  dim,
     *      CUaddress_mode am )
     * 
*
*

Sets the addressing mode for a texture * reference. Specifies the addressing mode am for the given * dimension dim of the texture reference hTexRef. If * dim is zero, the addressing mode is applied to the first * parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as: *

   typedef enum CUaddress_mode_enum {
     *       CU_TR_ADDRESS_MODE_WRAP = 0,
     *       CU_TR_ADDRESS_MODE_CLAMP = 1,
     *       CU_TR_ADDRESS_MODE_MIRROR = 2,
     *       CU_TR_ADDRESS_MODE_BORDER = 3
     *    } CUaddress_mode;
*

*

Note that this call has no effect if * hTexRef is bound to linear memory. Also, if the flag, * CU_TRSF_NORMALIZED_COORDINATES, is not set, the only supported address * mode is CU_TR_ADDRESS_MODE_CLAMP. *

*
* * @param hTexRef Texture reference * @param dim Dimension * @param am Addressing mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am) { return checkResult(cuTexRefSetAddressModeNative(hTexRef, dim, am)); } private static native int cuTexRefSetAddressModeNative(CUtexref hTexRef, int dim, int am); /** * Sets the filtering mode for a texture reference. * *
     * CUresult cuTexRefSetFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
*
*

Sets the filtering mode for a texture * reference. Specifies the filtering mode fm to be used when * reading memory through the texture reference hTexRef. * CUfilter_mode_enum is defined as: *

*
   typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
*

*

Note that this call has no effect if * hTexRef is bound to linear memory. *

*
* * @param hTexRef Texture reference * @param fm Filtering mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm) { return checkResult(cuTexRefSetFilterModeNative(hTexRef, fm)); } private static native int cuTexRefSetFilterModeNative(CUtexref hTexRef, int fm); /** * Sets the mipmap filtering mode for a texture reference. * *
     * CUresult cuTexRefSetMipmapFilterMode (
     *      CUtexref hTexRef,
     *      CUfilter_mode fm )
     * 
*
*

Sets the mipmap filtering mode for a * texture reference. Specifies the mipmap filtering mode fm * to be used when reading memory through the texture reference hTexRef. CUfilter_mode_enum is defined as: *

*
   typedef enum CUfilter_mode_enum {
     *       CU_TR_FILTER_MODE_POINT = 0,
     *       CU_TR_FILTER_MODE_LINEAR = 1
     *    } CUfilter_mode;
*

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param fm Filtering mode to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetMipmapFilterMode(CUtexref hTexRef, int fm) { return checkResult(cuTexRefSetMipmapFilterModeNative(hTexRef, fm)); } private static native int cuTexRefSetMipmapFilterModeNative(CUtexref hTexRef, int fm); /** * Sets the mipmap level bias for a texture reference. * *
     * CUresult cuTexRefSetMipmapLevelBias (
     *      CUtexref hTexRef,
     *      float  bias )
     * 
*
*

Sets the mipmap level bias for a texture * reference. Specifies the mipmap level bias bias to be added * to the specified mipmap level when reading memory through the texture * reference hTexRef. *

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param bias Mipmap level bias * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) { return checkResult(cuTexRefSetMipmapLevelBiasNative(hTexRef, bias)); } private static native int cuTexRefSetMipmapLevelBiasNative(CUtexref hTexRef, float bias); /** * Sets the mipmap min/max mipmap level clamps for a texture reference. * *
     * CUresult cuTexRefSetMipmapLevelClamp (
     *      CUtexref hTexRef,
     *      float  minMipmapLevelClamp,
     *      float  maxMipmapLevelClamp )
     * 
*
*

Sets the mipmap min/max mipmap level * clamps for a texture reference. Specifies the min/max mipmap level * clamps, minMipmapLevelClamp and maxMipmapLevelClamp * respectively, to be used when reading memory through the texture * reference hTexRef. *

*

Note that this call has no effect if * hTexRef is not bound to a mipmapped array. *

*
* * @param hTexRef Texture reference * @param minMipmapLevelClamp Mipmap min level clamp * @param maxMipmapLevelClamp Mipmap max level clamp * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { return checkResult(cuTexRefSetMipmapLevelClampNative(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp)); } private static native int cuTexRefSetMipmapLevelClampNative(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp); /** * Sets the maximum anistropy for a texture reference. * *
     * CUresult cuTexRefSetMaxAnisotropy (
     *      CUtexref hTexRef,
     *      unsigned int  maxAniso )
     * 
*
*

Sets the maximum anistropy for a texture * reference. Specifies the maximum aniostropy maxAniso to be * used when reading memory through the texture reference hTexRef. *

*

Note that this call has no effect if * hTexRef is bound to linear memory. *

*
* * @param hTexRef Texture reference * @param maxAniso Maximum anisotropy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetMaxAnisotropy(CUtexref hTexRef, int maxAniso) { return checkResult(cuTexRefSetMaxAnisotropyNative(hTexRef, maxAniso)); } private static native int cuTexRefSetMaxAnisotropyNative(CUtexref hTexRef, int maxAniso); /** * Sets the border color for a texture reference
*
* Specifies the value of the RGBA color via the pBorderColor to the texture reference * hTexRef. The color value supports only float type and holds color components in * the following sequence:
* pBorderColor[0] holds 'R' component
* pBorderColor[1] holds 'G' component
* pBorderColor[2] holds 'B' component
* pBorderColor[3] holds 'A' component
*
* Note that the color values can be set only when the Address mode is set to * CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode. * Applications using integer border color values have to "reinterpret_cast" their values to float. * * @param hTexRef Texture reference * @param pBorderColor RGBA color * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetBorderColor * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetBorderColor(CUtexref hTexRef, float pBorderColor[]) { return checkResult(cuTexRefSetBorderColorNative(hTexRef, pBorderColor)); } private static native int cuTexRefSetBorderColorNative(CUtexref hTexRef, float pBorderColor[]); /** * Sets the flags for a texture reference. * *
     * CUresult cuTexRefSetFlags (
     *      CUtexref hTexRef,
     *      unsigned int  Flags )
     * 
*
*

Sets the flags for a texture reference. * Specifies optional flags via Flags to specify the behavior * of data returned through the texture reference hTexRef. The * valid flags are: *

*
    *
  • *

    CU_TRSF_READ_AS_INTEGER, which * suppresses the default behavior of having the texture promote integer * data to floating point data in the range [0, * 1]. Note that texture with * 32-bit integer format would not be promoted, regardless of whether or * not this flag is specified; *

    *
  • *
  • *

    CU_TRSF_NORMALIZED_COORDINATES, * which suppresses the default behavior of having the texture coordinates * range from [0, Dim) where Dim is the width or height * of the CUDA array. Instead, the * texture coordinates [0, 1.0) reference the entire breadth of the array * dimension; *

    *
  • *
*

*
* * @param hTexRef Texture reference * @param Flags Optional flags to set * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefSetFlags(CUtexref hTexRef, int Flags) { return checkResult(cuTexRefSetFlagsNative(hTexRef, Flags)); } private static native int cuTexRefSetFlagsNative(CUtexref hTexRef, int Flags); /** * Gets the address associated with a texture reference. * *
     * CUresult cuTexRefGetAddress (
     *      CUdeviceptr* pdptr,
     *      CUtexref hTexRef )
     * 
*
*

Gets the address associated with a * texture reference. Returns in *pdptr the base address bound * to the texture reference hTexRef, or returns * CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any * device memory range. *

*
* * @param pdptr Returned device address * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef) { return checkResult(cuTexRefGetAddressNative(pdptr, hTexRef)); } private static native int cuTexRefGetAddressNative(CUdeviceptr pdptr, CUtexref hTexRef); /** * Gets the array bound to a texture reference. * *
     * CUresult cuTexRefGetArray (
     *      CUarray* phArray,
     *      CUtexref hTexRef )
     * 
*
*

Gets the array bound to a texture * reference. Returns in *phArray the CUDA array bound to the * texture reference hTexRef, or returns CUDA_ERROR_INVALID_VALUE * if the texture reference is not bound to any CUDA array. *

*
* * @param phArray Returned array * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef) { return checkResult(cuTexRefGetArrayNative(phArray, hTexRef)); } private static native int cuTexRefGetArrayNative(CUarray phArray, CUtexref hTexRef); /** * Gets the mipmapped array bound to a texture reference. * *
     * CUresult cuTexRefGetMipmappedArray (
     *      CUmipmappedArray* phMipmappedArray,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmapped array bound to a * texture reference. Returns in *phMipmappedArray the CUDA * mipmapped array bound to the texture reference hTexRef, or * returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound * to any CUDA mipmapped array. *

*
* * @param phMipmappedArray Returned mipmapped array * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetMipmappedArray(CUmipmappedArray phMipmappedArray, CUtexref hTexRef) { return checkResult(cuTexRefGetMipmappedArrayNative(phMipmappedArray, hTexRef)); } private static native int cuTexRefGetMipmappedArrayNative(CUmipmappedArray phMipmappedArray, CUtexref hTexRef); /** * Gets the addressing mode used by a texture reference. * *
     * CUresult cuTexRefGetAddressMode (
     *      CUaddress_mode* pam,
     *      CUtexref hTexRef,
     *      int  dim )
     * 
*
*

Gets the addressing mode used by a * texture reference. Returns in *pam the addressing mode * corresponding to the dimension dim of the texture reference * hTexRef. Currently, the only valid value for dim * are 0 and 1. *

*
* * @param pam Returned addressing mode * @param hTexRef Texture reference * @param dim Dimension * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetAddressMode(int pam[], CUtexref hTexRef, int dim) { return checkResult(cuTexRefGetAddressModeNative(pam, hTexRef, dim)); } private static native int cuTexRefGetAddressModeNative(int pam[], CUtexref hTexRef, int dim); /** * Gets the filter-mode used by a texture reference. * *
     * CUresult cuTexRefGetFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
*
*

Gets the filter-mode used by a texture * reference. Returns in *pfm the filtering mode of the texture * reference hTexRef. *

*
* * @param pfm Returned filtering mode * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetFilterMode(int pfm[], CUtexref hTexRef) { return checkResult(cuTexRefGetFilterModeNative(pfm, hTexRef)); } private static native int cuTexRefGetFilterModeNative(int pfm[], CUtexref hTexRef); /** * Gets the format used by a texture reference. * *
     * CUresult cuTexRefGetFormat (
     *      CUarray_format* pFormat,
     *      int* pNumChannels,
     *      CUtexref hTexRef )
     * 
*
*

Gets the format used by a texture * reference. Returns in *pFormat and *pNumChannels * the format and number of components of the CUDA array bound to the * texture reference hTexRef. If pFormat or pNumChannels is NULL, it will be ignored. *

*
* * @param pFormat Returned format * @param pNumChannels Returned number of components * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetFormat(int pFormat[], int pNumChannels[], CUtexref hTexRef) { return checkResult(cuTexRefGetFormatNative(pFormat, pNumChannels, hTexRef)); } private static native int cuTexRefGetFormatNative(int pFormat[], int pNumChannels[], CUtexref hTexRef); /** * Gets the mipmap filtering mode for a texture reference. * *
     * CUresult cuTexRefGetMipmapFilterMode (
     *      CUfilter_mode* pfm,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmap filtering mode for a * texture reference. Returns the mipmap filtering mode in pfm * that's used when reading memory through the texture reference hTexRef. *

*
* * @param pfm Returned mipmap filtering mode * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetMipmapFilterMode(int pfm[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapFilterModeNative(pfm, hTexRef)); } private static native int cuTexRefGetMipmapFilterModeNative(int pfm[], CUtexref hTexRef); /** * Gets the mipmap level bias for a texture reference. * *
     * CUresult cuTexRefGetMipmapLevelBias (
     *      float* pbias,
     *      CUtexref hTexRef )
     * 
*
*

Gets the mipmap level bias for a texture * reference. Returns the mipmap level bias in pBias that's * added to the specified mipmap level when reading memory through the * texture reference hTexRef. *

*
* * @param pbias Returned mipmap level bias * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetMipmapLevelBias(float pbias[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapLevelBiasNative(pbias, hTexRef)); } private static native int cuTexRefGetMipmapLevelBiasNative(float pbias[], CUtexref hTexRef); /** * Gets the min/max mipmap level clamps for a texture reference. * *
     * CUresult cuTexRefGetMipmapLevelClamp (
     *      float* pminMipmapLevelClamp,
     *      float* pmaxMipmapLevelClamp,
     *      CUtexref hTexRef )
     * 
*
*

Gets the min/max mipmap level clamps for * a texture reference. Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp that's * used when reading memory through the texture reference hTexRef. *

*
* * @param pminMipmapLevelClamp Returned mipmap min level clamp * @param pmaxMipmapLevelClamp Returned mipmap max level clamp * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetMipmapLevelClamp(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef) { return checkResult(cuTexRefGetMipmapLevelClampNative(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef)); } private static native int cuTexRefGetMipmapLevelClampNative(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef); /** * Gets the maximum anistropy for a texture reference. * *
     * CUresult cuTexRefGetMaxAnisotropy (
     *      int* pmaxAniso,
     *      CUtexref hTexRef )
     * 
*
*

Gets the maximum anistropy for a texture * reference. Returns the maximum aniostropy in pmaxAniso * that's used when reading memory through the texture reference hTexRef. *

*
* * @param pmaxAniso Returned maximum anisotropy * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFlags * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetMaxAnisotropy(int pmaxAniso[], CUtexref hTexRef) { return checkResult(cuTexRefGetMaxAnisotropyNative(pmaxAniso, hTexRef)); } private static native int cuTexRefGetMaxAnisotropyNative(int pmaxAniso[], CUtexref hTexRef); /** * brief Gets the border color used by a texture reference
*
* Returns in pBorderColor, values of the RGBA color used by * the texture reference hTexRef. * The color value is of type float and holds color components in * the following sequence:
* pBorderColor[0] holds 'R' component
* pBorderColor[1] holds 'G' component
* pBorderColor[2] holds 'B' component
* pBorderColor[3] holds 'A' component
* * @param hTexRef Texture reference * @param pBorderColor Returned Type and Value of RGBA color * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetBorderColor * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetBorderColor(float pBorderColor[], CUtexref hTexRef) { return checkResult(cuTexRefGetBorderColorNative(pBorderColor, hTexRef)); } private static native int cuTexRefGetBorderColorNative(float pBorderColor[], CUtexref hTexRef); /** * Gets the flags used by a texture reference. * *
     * CUresult cuTexRefGetFlags (
     *      unsigned int* pFlags,
     *      CUtexref hTexRef )
     * 
*
*

Gets the flags used by a texture * reference. Returns in *pFlags the flags of the texture * reference hTexRef. *

*
* * @param pFlags Returned flags * @param hTexRef Texture reference * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexRefSetAddress * @see JCudaDriver#cuTexRefSetAddress2D * @see JCudaDriver#cuTexRefSetAddressMode * @see JCudaDriver#cuTexRefSetArray * @see JCudaDriver#cuTexRefSetFilterMode * @see JCudaDriver#cuTexRefSetFlags * @see JCudaDriver#cuTexRefSetFormat * @see JCudaDriver#cuTexRefGetAddress * @see JCudaDriver#cuTexRefGetAddressMode * @see JCudaDriver#cuTexRefGetArray * @see JCudaDriver#cuTexRefGetFilterMode * @see JCudaDriver#cuTexRefGetFormat * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuTexRefGetFlags(int pFlags[], CUtexref hTexRef) { return checkResult(cuTexRefGetFlagsNative(pFlags, hTexRef)); } private static native int cuTexRefGetFlagsNative(int pFlags[], CUtexref hTexRef); /** * Sets the CUDA array for a surface reference. * *
     * CUresult cuSurfRefSetArray (
     *      CUsurfref hSurfRef,
     *      CUarray hArray,
     *      unsigned int  Flags )
     * 
*
*

Sets the CUDA array for a surface * reference. Sets the CUDA array hArray to be read and written * by the surface reference hSurfRef. Any previous CUDA array * state associated with the surface reference is superseded by this * function. Flags must be set to 0. The CUDA_ARRAY3D_SURFACE_LDST * flag must have been set for the CUDA array. Any CUDA array previously * bound to hSurfRef is unbound. *

*
* * @param hSurfRef Surface reference handle * @param hArray CUDA array handle * @param Flags set to 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuSurfRefGetArray * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, int Flags ) { return checkResult(cuSurfRefSetArrayNative(hSurfRef, hArray, Flags)); } private static native int cuSurfRefSetArrayNative(CUsurfref hSurfRef, CUarray hArray, int Flags ); /** * Passes back the CUDA array bound to a surface reference. * *
     * CUresult cuSurfRefGetArray (
     *      CUarray* phArray,
     *      CUsurfref hSurfRef )
     * 
*
*

Passes back the CUDA array bound to a * surface reference. Returns in *phArray the CUDA array bound * to the surface reference hSurfRef, or returns * CUDA_ERROR_INVALID_VALUE if the surface reference is not bound to any * CUDA array. *

*
* * @param phArray Surface reference handle * @param hSurfRef Surface reference handle * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuModuleGetSurfRef * @see JCudaDriver#cuSurfRefSetArray * * @deprecated Deprecated as of CUDA 10.1 */ public static int cuSurfRefGetArray( CUarray phArray, CUsurfref hSurfRef ) { return checkResult(cuSurfRefGetArrayNative(phArray, hSurfRef)); } private static native int cuSurfRefGetArrayNative( CUarray phArray, CUsurfref hSurfRef ); /** * Creates a texture object. * *
     * CUresult cuTexObjectCreate (
     *      CUtexObject* pTexObject,
     *      const CUDA_RESOURCE_DESC* pResDesc,
     *      const CUDA_TEXTURE_DESC* pTexDesc,
     *      const CUDA_RESOURCE_VIEW_DESC* pResViewDesc )
     * 
*
*

Creates a texture object. Creates a * texture object and returns it in pTexObject. pResDesc * describes the data to texture from. pTexDesc describes how * the data should be sampled. pResViewDesc is an optional * argument that specifies an alternate format for the data described by * pResDesc, and also describes the subresource region to * restrict access to when texturing. pResViewDesc can only be * specified if the type of resource is a CUDA array or a CUDA mipmapped * array. *

*

Texture objects are only supported on * devices of compute capability 3.0 or higher. *

*

The CUDA_RESOURCE_DESC structure is * defined as: *

        typedef struct CUDA_RESOURCE_DESC_st
     *         {
     *             CUresourcetype resType;
     *
     *             union {
     *                 struct {
     *                     CUarray hArray;
     *                 } array;
     *                 struct {
     *                     CUmipmappedArray hMipmappedArray;
     *                 } mipmap;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t sizeInBytes;
     *                 } linear;
     *                 struct {
     *                     CUdeviceptr devPtr;
     *                     CUarray_format format;
     *                     unsigned int numChannels;
     *                     size_t width;
     *                     size_t height;
     *                     size_t pitchInBytes;
     *                 } pitch2D;
     *             } res;
     *
     *             unsigned int flags;
     *         } CUDA_RESOURCE_DESC;
* where: *
    *
  • *
    * CUDA_RESOURCE_DESC::resType * specifies the type of resource to texture from. CUresourceType is * defined as: *
            typedef enum CUresourcetype_enum {
         *             CU_RESOURCE_TYPE_ARRAY           = 0x00,
         *             CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
         *             CU_RESOURCE_TYPE_LINEAR          = 0x02,
         *             CU_RESOURCE_TYPE_PITCH2D         = 0x03
         *         } CUresourcetype;
    *
    *
  • *
*

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_ARRAY, CUDA_RESOURCE_DESC::res::array::hArray must * be set to a valid CUDA array handle. *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_MIPMAPPED_ARRAY, * CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray must be set to a valid * CUDA mipmapped array handle. *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_LINEAR, CUDA_RESOURCE_DESC::res::linear::devPtr * must be set to a valid device pointer, that is aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::linear::format * and CUDA_RESOURCE_DESC::res::linear::numChannels describe the format * of each component * and the number of components per array * element. CUDA_RESOURCE_DESC::res::linear::sizeInBytes specifies the * size of the array * in bytes. The total number of elements * in the linear address range cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of * elements is computed as (sizeInBytes / (sizeof(format) * * numChannels)). *

*

If CUDA_RESOURCE_DESC::resType is set * to CU_RESOURCE_TYPE_PITCH2D, CUDA_RESOURCE_DESC::res::pitch2D::devPtr * must be set to a valid device pointer, that is aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::pitch2D::format * and CUDA_RESOURCE_DESC::res::pitch2D::numChannels describe the format * of each component * and the number of components per array * element. CUDA_RESOURCE_DESC::res::pitch2D::width and * CUDA_RESOURCE_DESC::res::pitch2D::height * specify the width and height of the array * in elements, and cannot exceed CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH * and CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. * CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch * between two rows in bytes and has to be * aligned to * CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed * CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. *

*
    *
  • *

    flags must be set to zero.

    *
  • *
*

*

The CUDA_TEXTURE_DESC struct is defined * as *

        typedef struct CUDA_TEXTURE_DESC_st {
     *             CUaddress_mode addressMode[3];
     *             CUfilter_mode filterMode;
     *             unsigned int flags;
     *             unsigned int maxAnisotropy;
     *             CUfilter_mode mipmapFilterMode;
     *             float mipmapLevelBias;
     *             float minMipmapLevelClamp;
     *             float maxMipmapLevelClamp;
     *         } CUDA_TEXTURE_DESC;
* where *
    *
  • *
    * CUDA_TEXTURE_DESC::addressMode * specifies the addressing mode for each dimension of the texture data. * CUaddress_mode is defined as: *
            typedef enum
         * CUaddress_mode_enum {
         *             CU_TR_ADDRESS_MODE_WRAP = 0,
         *             CU_TR_ADDRESS_MODE_CLAMP = 1,
         *             CU_TR_ADDRESS_MODE_MIRROR = 2,
         *             CU_TR_ADDRESS_MODE_BORDER = 3
         *         } CUaddress_mode;
    * This is ignored if * CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. Also, if the * flag, CU_TRSF_NORMALIZED_COORDINATES is not set, the only supported * address mode is CU_TR_ADDRESS_MODE_CLAMP. *
    *
  • *
*

*
    *
  • *
    * CUDA_TEXTURE_DESC::filterMode * specifies the filtering mode to be used when fetching from the texture. * CUfilter_mode is defined as: *
            typedef enum CUfilter_mode_enum
         * {
         *             CU_TR_FILTER_MODE_POINT = 0,
         *             CU_TR_FILTER_MODE_LINEAR = 1
         *         } CUfilter_mode;
    * This is ignored if * CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. *
    *
  • *
*

*
    *
  • *
    * CUDA_TEXTURE_DESC::flags can * be any combination of the following: *
      *
    • *

      CU_TRSF_READ_AS_INTEGER, * which suppresses the default behavior of having the texture promote * integer data to floating point data in the range [0, * 1]. Note that texture * with 32-bit integer format would not be promoted, regardless of whether * or not this flag is specified. *

      *
    • *
    • *

      CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior * of having the texture coordinates range from [0, Dim) where Dim is the * width or height * of the CUDA array. * Instead, the texture coordinates [0, 1.0) reference the entire breadth * of the array dimension; Note that * for CUDA mipmapped * arrays, this flag has to be set. *

      *
    • *
    *
    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::maxAnisotropy * specifies the maximum anistropy ratio to be used when doing anisotropic * filtering. This value will be clamped to the range * [1,16]. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::mipmapFilterMode * specifies the filter mode when the calculated mipmap level lies between * two defined mipmap levels. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::mipmapLevelBias * specifies the offset to be applied to the calculated mipmap level. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::minMipmapLevelClamp * specifies the lower end of the mipmap level range to clamp access to. *

    *
  • *
*

*
    *
  • *

    CUDA_TEXTURE_DESC::maxMipmapLevelClamp * specifies the upper end of the mipmap level range to clamp access to. *

    *
  • *
*

*

The CUDA_RESOURCE_VIEW_DESC struct is * defined as *

        typedef struct CUDA_RESOURCE_VIEW_DESC_st
     *         {
     *             CUresourceViewFormat format;
     *             size_t width;
     *             size_t height;
     *             size_t depth;
     *             unsigned int firstMipmapLevel;
     *             unsigned int lastMipmapLevel;
     *             unsigned int firstLayer;
     *             unsigned int lastLayer;
     *         } CUDA_RESOURCE_VIEW_DESC;
* where: *
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::format * specifies how the data contained in the CUDA array or CUDA mipmapped * array should be interpreted. Note that this can incur * a change in size of the texture * data. If the resource view format is a block compressed format, then * the underlying CUDA array * or CUDA mipmapped array has to * have a base of format CU_AD_FORMAT_UNSIGNED_INT32. with 2 or 4 channels, * depending on the block compressed format. For ex., BC1 and BC4 require * the underlying CUDA array to * have a format of * CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats * require the underlying resource to have the same base format but with * 4 channels. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::width * specifies the new width of the texture data. If the resource view * format is a block compressed format, this value has to * be 4 times the original width * of the resource. For non block compressed formats, this value has to * be equal to that of the * original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::height * specifies the new height of the texture data. If the resource view * format is a block compressed format, this value has to * be 4 times the original height * of the resource. For non block compressed formats, this value has to * be equal to that of the * original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::depth * specifies the new depth of the texture data. This value has to be equal * to that of the original resource. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed * mipmap level. This will be the new mipmap level zero. For non-mipmapped * resources, this value * has to be * zero.CUDA_TEXTURE_DESC::minMipmapLevelClamp and * CUDA_TEXTURE_DESC::maxMipmapLevelClamp will be relative to this value. * For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp * of 1.2 is specified, * then the actual minimum mipmap * level clamp will be 3.2. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel * specifies the least detailed mipmap level. For non-mipmapped resources, * this value has to be zero. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::firstLayer * specifies the first layer index for layered textures. This will be the * new layer zero. For non-layered resources, this value * has to be zero. *

    *
  • *
*

*
    *
  • *

    CUDA_RESOURCE_VIEW_DESC::lastLayer * specifies the last layer index for layered textures. For non-layered * resources, this value has to be zero. *

    *
  • *
*

*
* * @param pTexObject Texture object to create * @param pResDesc Resource descriptor * @param pTexDesc Texture descriptor * @param pResViewDesc Resource view descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectDestroy */ public static int cuTexObjectCreate(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc) { return checkResult(cuTexObjectCreateNative(pTexObject, pResDesc, pTexDesc, pResViewDesc)); } private static native int cuTexObjectCreateNative(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc); /** * Destroys a texture object. * *
     * CUresult cuTexObjectDestroy (
     *      CUtexObject texObject )
     * 
*
*

Destroys a texture object. Destroys the * texture object specified by texObject. *

*
* * @param texObject Texture object to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectDestroy(CUtexObject texObject) { return checkResult(cuTexObjectDestroyNative(texObject)); } private static native int cuTexObjectDestroyNative(CUtexObject texObject); /** * Returns a texture object's resource descriptor. * *
     * CUresult cuTexObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's resource * descriptor. Returns the resource descriptor for the texture object * specified by texObject. *

*
* * @param pResDesc Resource descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetResourceDescNative(pResDesc, texObject)); } private static native int cuTexObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject); /** * Returns a texture object's texture descriptor. * *
     * CUresult cuTexObjectGetTextureDesc (
     *      CUDA_TEXTURE_DESC* pTexDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's texture * descriptor. Returns the texture descriptor for the texture object * specified by texObject. *

*
* * @param pTexDesc Texture descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetTextureDescNative(pTexDesc, texObject)); } private static native int cuTexObjectGetTextureDescNative(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject); /** * Returns a texture object's resource view descriptor. * *
     * CUresult cuTexObjectGetResourceViewDesc (
     *      CUDA_RESOURCE_VIEW_DESC* pResViewDesc,
     *      CUtexObject texObject )
     * 
*
*

Returns a texture object's resource view * descriptor. Returns the resource view descriptor for the texture * object specified * by texObject. If no resource * view was set for texObject, the CUDA_ERROR_INVALID_VALUE is * returned. *

*
* * @param pResViewDesc Resource view descriptor * @param texObject Texture object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuTexObjectCreate */ public static int cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject) { return checkResult(cuTexObjectGetResourceViewDescNative(pResViewDesc, texObject)); } private static native int cuTexObjectGetResourceViewDescNative(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject); /** * Creates a surface object. * *
     * CUresult cuSurfObjectCreate (
     *      CUsurfObject* pSurfObject,
     *      const CUDA_RESOURCE_DESC* pResDesc )
     * 
*
*

Creates a surface object. Creates a * surface object and returns it in pSurfObject. pResDesc describes the data to perform surface load/stores on. * CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and * CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA * array handle. CUDA_RESOURCE_DESC::flags must be set to zero. *

*

Surface objects are only supported on * devices of compute capability 3.0 or higher. *

*
* * @param pSurfObject Surface object to create * @param pResDesc Resource descriptor * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectDestroy */ public static int cuSurfObjectCreate(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc) { return checkResult(cuSurfObjectCreateNative(pSurfObject, pResDesc)); } private static native int cuSurfObjectCreateNative(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc); /** * Destroys a surface object. * *
     * CUresult cuSurfObjectDestroy (
     *      CUsurfObject surfObject )
     * 
*
*

Destroys a surface object. Destroys the * surface object specified by surfObject. *

*
* * @param surfObject Surface object to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectCreate */ public static int cuSurfObjectDestroy(CUsurfObject surfObject) { return checkResult(cuSurfObjectDestroyNative(surfObject)); } private static native int cuSurfObjectDestroyNative(CUsurfObject surfObject); /** * Returns a surface object's resource descriptor. * *
     * CUresult cuSurfObjectGetResourceDesc (
     *      CUDA_RESOURCE_DESC* pResDesc,
     *      CUsurfObject surfObject )
     * 
*
*

Returns a surface object's resource * descriptor. Returns the resource descriptor for the surface object * specified by surfObject. *

*
* * @param pResDesc Resource descriptor * @param surfObject Surface object * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuSurfObjectCreate */ public static int cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject) { return checkResult(cuSurfObjectGetResourceDescNative(pResDesc, surfObject)); } private static native int cuSurfObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject); /** * Queries if a device may directly access a peer device's memory. * *
     * CUresult cuDeviceCanAccessPeer (
     *      int* canAccessPeer,
     *      CUdevice dev,
     *      CUdevice peerDev )
     * 
*
*

Queries if a device may directly access * a peer device's memory. Returns in *canAccessPeer a value * of 1 if contexts on dev are capable of directly accessing * memory from contexts on peerDev and 0 otherwise. If direct * access of peerDev from dev is possible, then access * may be enabled on two specific contexts by calling * cuCtxEnablePeerAccess(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param canAccessPeer Returned access capability * @param dev Device from which allocations on peerDev are to be directly accessed. * @param peerDev Device on which the allocations to be directly accessed by dev reside. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuCtxEnablePeerAccess * @see JCudaDriver#cuCtxDisablePeerAccess */ public static int cuDeviceCanAccessPeer(int canAccessPeer[], CUdevice dev, CUdevice peerDev) { return checkResult(cuDeviceCanAccessPeerNative(canAccessPeer, dev, peerDev)); } private static native int cuDeviceCanAccessPeerNative(int canAccessPeer[], CUdevice dev, CUdevice peerDev); /** * Queries attributes of the link between two devices.
*
* Returns in *value the value of the requested attribute attrib of the * link between srcDevice and dstDevice. The supported attributes are: *
    *
  • CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: A relative value indicating the * performance of the link between two devices.
  • *
  • CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED P2P: 1 if P2P Access is enable.
  • *
  • CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: 1 if Atomic operations over * the link are supported.
  • *
* Returns ::CUDA_ERROR_INVALID_DEVICE if srcDevice or dstDevice are not valid * or if they represent the same device.
*
* Returns ::CUDA_ERROR_INVALID_VALUE if attrib is not valid or if value is * a null pointer.
* * @param value Returned value of the requested attribute * @param attrib The requested attribute of the link between \p srcDevice and \p dstDevice. * @param srcDevice The source device of the target link. * @param dstDevice The destination device of the target link. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxEnablePeerAccess * @see JCudaDriver#cuCtxDisablePeerAccess * @see JCudaDriver#cuCtxCanAccessPeer */ public static int cuDeviceGetP2PAttribute(int value[], int attrib, CUdevice srcDevice, CUdevice dstDevice) { return checkResult(cuDeviceGetP2PAttributeNative(value, attrib, srcDevice, dstDevice)); } private static native int cuDeviceGetP2PAttributeNative(int value[], int attrib, CUdevice srcDevice, CUdevice dstDevice); /** * Enables direct access to memory allocations in a peer context. * *
     * CUresult cuCtxEnablePeerAccess (
     *      CUcontext peerContext,
     *      unsigned int  Flags )
     * 
*
*

Enables direct access to memory * allocations in a peer context. If both the current context and peerContext are on devices which support unified addressing (as * may be queried using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same * major compute capability, then on success all allocations from peerContext will immediately be accessible by the current context. * See Unified Addressing for additional details. *

*

Note that access granted by this call * is unidirectional and that in order to access memory from the current * context in peerContext, a separate symmetric call to * cuCtxEnablePeerAccess() is required. *

* There is a system-wide maximum of eight peer connections per device. *

*

Returns CUDA_ERROR_PEER_ACCESS_UNSUPPORTED * if cuDeviceCanAccessPeer() indicates that the CUdevice of the current * context cannot directly access memory from the CUdevice of peerContext. *

*

Returns CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED * if direct access of peerContext from the current context has * already been enabled. *

*

Returns CUDA_ERROR_TOO_MANY_PEERS if * direct peer access is not possible because hardware resources required * for peer access have been exhausted. *

*

Returns CUDA_ERROR_INVALID_CONTEXT if * there is no current context, peerContext is not a valid * context, or if the current context is peerContext. *

*

Returns CUDA_ERROR_INVALID_VALUE if Flags is not 0. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param peerContext Peer context to enable direct access to from the current context * @param Flags Reserved for future use and must be set to 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_TOO_MANY_PEERS, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuDeviceCanAccessPeer * @see JCudaDriver#cuCtxDisablePeerAccess */ public static int cuCtxEnablePeerAccess(CUcontext peerContext, int Flags) { return checkResult(cuCtxEnablePeerAccessNative(peerContext, Flags)); } private static native int cuCtxEnablePeerAccessNative(CUcontext peerContext, int Flags); /** * Disables direct access to memory allocations in a peer context and unregisters any registered allocations. * *
     * CUresult cuCtxDisablePeerAccess (
     *      CUcontext peerContext )
     * 
*
*

Disables direct access to memory * allocations in a peer context and unregisters any registered allocations. * Returns CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has * not yet been enabled from peerContext to the current * context. *

*

Returns CUDA_ERROR_INVALID_CONTEXT if * there is no current context, or if peerContext is not a valid * context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param peerContext Peer context to disable direct access to * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuDeviceCanAccessPeer * @see JCudaDriver#cuCtxEnablePeerAccess */ public static int cuCtxDisablePeerAccess(CUcontext peerContext) { return checkResult(cuCtxDisablePeerAccessNative(peerContext)); } private static native int cuCtxDisablePeerAccessNative(CUcontext peerContext); /** * Sets the parameter size for the function. * *
     * CUresult cuParamSetSize (
     *      CUfunction hfunc,
     *      unsigned int  numbytes )
     * 
*
*

Sets the parameter size for the function. * Deprecated Sets through numbytes * the total size in bytes needed by the function parameters of the kernel * corresponding to hfunc. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to set parameter size for * @param numbytes Size of parameter list in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuParamSetSize(CUfunction hfunc, int numbytes) { return checkResult(cuParamSetSizeNative(hfunc, numbytes)); } private static native int cuParamSetSizeNative(CUfunction hfunc, int numbytes); /** * Adds an integer parameter to the function's argument list. * *
     * CUresult cuParamSeti (
     *      CUfunction hfunc,
     *      int  offset,
     *      unsigned int  value )
     * 
*
*

Adds an integer parameter to the * function's argument list. * Deprecated Sets an integer parameter that * will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add parameter to * @param offset Offset to add parameter to argument list * @param value Value of parameter * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuParamSeti(CUfunction hfunc, int offset, int value) { return checkResult(cuParamSetiNative(hfunc, offset, value)); } private static native int cuParamSetiNative(CUfunction hfunc, int offset, int value); /** * Adds a floating-point parameter to the function's argument list. * *
     * CUresult cuParamSetf (
     *      CUfunction hfunc,
     *      int  offset,
     *      float  value )
     * 
*
*

Adds a floating-point parameter to the * function's argument list. * Deprecated Sets a floating-point parameter * that will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add parameter to * @param offset Offset to add parameter to argument list * @param value Value of parameter * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuParamSetf(CUfunction hfunc, int offset, float value) { return checkResult(cuParamSetfNative(hfunc, offset, value)); } private static native int cuParamSetfNative(CUfunction hfunc, int offset, float value); /** * Adds arbitrary data to the function's argument list. * *
     * CUresult cuParamSetv (
     *      CUfunction hfunc,
     *      int  offset,
     *      void* ptr,
     *      unsigned int  numbytes )
     * 
*
*

Adds arbitrary data to the function's * argument list. * Deprecated Copies an arbitrary amount of * data (specified in numbytes) from ptr into the * parameter space of the kernel corresponding to hfunc. offset is a byte offset. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add data to * @param offset Offset to add data to argument list * @param ptr Pointer to arbitrary data * @param numbytes Size of data to copy in bytes * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes) { return checkResult(cuParamSetvNative(hfunc, offset, ptr, numbytes)); } private static native int cuParamSetvNative(CUfunction hfunc, int offset, Pointer ptr, int numbytes); /** * Adds a texture-reference to the function's argument list. * *
     * CUresult cuParamSetTexRef (
     *      CUfunction hfunc,
     *      int  texunit,
     *      CUtexref hTexRef )
     * 
*
*

Adds a texture-reference to the function's * argument list. * Deprecated Makes the CUDA array or linear * memory bound to the texture reference hTexRef available to a * device program as a texture. In this version of CUDA, the * texture-reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hfunc Kernel to add texture-reference to * @param texunit Texture unit (must be CU_PARAM_TR_DEFAULT) * @param hTexRef Texture-reference to add to argument list * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) { return checkResult(cuParamSetTexRefNative(hfunc, texunit, hTexRef)); } private static native int cuParamSetTexRefNative(CUfunction hfunc, int texunit, CUtexref hTexRef); /** * Creates a graph.
*
* Creates an empty graph, which is returned via \p phGraph. * * @param phGraph - Returns newly created graph * @param flags - Graph creation flags, must be 0 * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode * @see JCudaDriver#cuGraphInstantiate * @see JCudaDriver#cuGraphDestroy * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphClone */ public static int cuGraphCreate(CUgraph phGraph, int flags) { return checkResult(cuGraphCreateNative(phGraph, flags)); } private static native int cuGraphCreateNative(CUgraph phGraph, int flags); /** * Creates a kernel execution node and adds it to a graph.
*
* Creates a new kernel execution node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies and arguments specified in \p nodeParams. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode.
*
* The CUDA_KERNEL_NODE_PARAMS structure is defined as:
*
*

     *  typedef struct CUDA_KERNEL_NODE_PARAMS_st {
     *      CUfunction func;
     *      unsigned int gridDimX;
     *      unsigned int gridDimY;
     *      unsigned int gridDimZ;
     *      unsigned int blockDimX;
     *      unsigned int blockDimY;
     *      unsigned int blockDimZ;
     *      unsigned int sharedMemBytes;
     *      void **kernelParams;
     *      void **extra;
     *  } CUDA_KERNEL_NODE_PARAMS;
     * 
*
* When the graph is launched, the node will invoke kernel \p func on a (\p gridDimX x * \p gridDimY x \p gridDimZ) grid of blocks. Each block contains * (\p blockDimX x \p blockDimY x \p blockDimZ) threads.
*
* \p sharedMemBytes sets the amount of dynamic shared memory that will be * available to each thread block.
*
* Kernel parameters to \p func can be specified in one of two ways:
*
* 1) Kernel parameters can be specified via \p kernelParams. If the kernel has N * parameters, then \p kernelParams needs to be an array of N pointers. Each pointer, * from \p kernelParams[0] to \p kernelParams[N-1], points to the region of memory from which the actual * parameter will be copied. The number of kernel parameters and their offsets and sizes do not need * to be specified as that information is retrieved directly from the kernel's image.
*
* 2) Kernel parameters can also be packaged by the application into a single buffer that is passed in * via \p extra. This places the burden on the application of knowing each kernel * parameter's size and alignment/padding within the buffer. The \p extra parameter exists * to allow this function to take additional less commonly used arguments. \p extra specifies * a list of names of extra settings and their corresponding values. Each extra setting name is * immediately followed by the corresponding value. The list must be terminated with either NULL or * CU_LAUNCH_PARAM_END.
*
*
    *
  • ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra * array;
  • *
  • ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next * value in \p extra will be a pointer to a buffer * containing all the kernel parameters for launching kernel * \p func;
  • *
  • ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next * value in \p extra will be a pointer to a size_t * containing the size of the buffer specified with * ::CU_LAUNCH_PARAM_BUFFER_POINTER;
  • *
*
* The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters are specified with both * \p kernelParams and \p extra (i.e. both \p kernelParams and * \p extra are non-NULL).
*
* The \p kernelParams or \p extra array, as well as the argument values it points to, * are copied during this call. *
* Kernels launched using graphs must not use texture and surface references. Reading or * writing through any texture or surface reference is undefined behavior. * This restriction does not apply to texture and surface objects. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * @param nodeParams - Parameters for the GPU execution node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuLaunchKernel * @see JCudaDriver#cuGraphKernelNodeGetParams * @see JCudaDriver#cuGraphKernelNodeSetParams * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode */ public static int cuGraphAddKernelNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_KERNEL_NODE_PARAMS nodeParams) { return checkResult(cuGraphAddKernelNodeNative(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)); } private static native int cuGraphAddKernelNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_KERNEL_NODE_PARAMS nodeParams); /** * Returns a kernel node's parameters.
*
* Returns the parameters of kernel node \p hNode in \p nodeParams. * The \p kernelParams or \p extra array returned in \p nodeParams, * as well as the argument values it points to, are owned by the node. * This memory remains valid until the node is destroyed or its * parameters are modified, and should not be modified * directly. Use ::cuGraphKernelNodeSetParams to update the * parameters of this node.
*
* The params will contain either \p kernelParams or \p extra, * according to which of these was most recently set on the node. * * @param hNode - Node to get the parameters for * @param nodeParams - Pointer to return the parameters * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuLaunchKernel * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphKernelNodeSetParams */ public static int cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams) { return checkResult(cuGraphKernelNodeGetParamsNative(hNode, nodeParams)); } private static native int cuGraphKernelNodeGetParamsNative(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams); /** * Sets a kernel node's parameters. * * Sets the parameters of kernel node \p hNode to \p nodeParams. * * @param hNode - Node to set the parameters for * @param nodeParams - Parameters to copy * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_OUT_OF_MEMORY * * * @see JCudaDriver#cuLaunchKernel * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphKernelNodeGetParams */ public static int cuGraphKernelNodeSetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams) { return checkResult(cuGraphKernelNodeSetParamsNative(hNode, nodeParams)); } private static native int cuGraphKernelNodeSetParamsNative(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams); /** * Creates a memcpy node and adds it to a graph.
*
* Creates a new memcpy node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode.
*
* When the graph is launched, the node will perform the memcpy described by \p copyParams. * See ::cuMemcpy3D() for a description of the structure and its restrictions.
*
* Memcpy nodes have some additional restrictions with regards to managed memory, if the * system contains at least one device which has a zero value for the device attribute * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer * to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed * for those operand(s). The managed memory will be treated as residing on either the * host or the device, depending on which memory type is specified. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * @param copyParams - Parameters for the memory copy * @param ctx - Context on which to run the node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuGraphMemcpyNodeGetParams * @see JCudaDriver#cuGraphMemcpyNodeSetParams * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemsetNode */ public static int cuGraphAddMemcpyNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMCPY3D copyParams, CUcontext ctx) { return checkResult(cuGraphAddMemcpyNodeNative(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx)); } private static native int cuGraphAddMemcpyNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMCPY3D copyParams, CUcontext ctx); /** * Returns a memcpy node's parameters.
*
* Returns the parameters of memcpy node \p hNode in \p nodeParams. * * @param hNode - Node to get the parameters for * @param nodeParams - Pointer to return the parameters * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphMemcpyNodeSetParams */ public static int cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams) { return checkResult(cuGraphMemcpyNodeGetParamsNative(hNode, nodeParams)); } private static native int cuGraphMemcpyNodeGetParamsNative(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams); /** * Sets a memcpy node's parameters.
*
* Sets the parameters of memcpy node \p hNode to \p nodeParams. * * @param hNode - Node to set the parameters for * @param nodeParams - Parameters to copy * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * * * @see JCudaDriver#cuMemcpy3D * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphMemcpyNodeGetParams */ public static int cuGraphMemcpyNodeSetParams(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams) { return checkResult(cuGraphMemcpyNodeSetParamsNative(hNode, nodeParams)); } private static native int cuGraphMemcpyNodeSetParamsNative(CUgraphNode hNode, CUDA_MEMCPY3D nodeParams); /** * Creates a memset node and adds it to a graph.
*
* Creates a new memset node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode.
*
* The element size must be 1, 2, or 4 bytes. * When the graph is launched, the node will perform the memset described by \p memsetParams. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * @param memsetParams - Parameters for the memory set * @param ctx - Context on which to run the node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_CONTEXT * * * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuGraphMemsetNodeGetParams * @see JCudaDriver#cuGraphMemsetNodeSetParams * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode */ public static int cuGraphAddMemsetNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx) { return checkResult(cuGraphAddMemsetNodeNative(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx)); } private static native int cuGraphAddMemsetNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx); /** * Returns a memset node's parameters.
*
* Returns the parameters of memset node \p hNode in \p nodeParams. * * @param hNode - Node to get the parameters for * @param nodeParams - Pointer to return the parameters * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuGraphAddMemsetNode * @see JCudaDriver#cuGraphMemsetNodeSetParams */ public static int cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams) { return checkResult(cuGraphMemsetNodeGetParamsNative(hNode, nodeParams)); } private static native int cuGraphMemsetNodeGetParamsNative(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams); /** * Sets a memset node's parameters.
*
* Sets the parameters of memset node \p hNode to \p nodeParams. * * @param hNode - Node to set the parameters for * @param nodeParams - Parameters to copy * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuMemsetD2D32 * @see JCudaDriver#cuGraphAddMemsetNode * @see JCudaDriver#cuGraphMemsetNodeGetParams */ public static int cuGraphMemsetNodeSetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams) { return checkResult(cuGraphMemsetNodeSetParamsNative(hNode, nodeParams)); } private static native int cuGraphMemsetNodeSetParamsNative(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS nodeParams); /** * Creates a host execution node and adds it to a graph.
*
* Creates a new CPU execution node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies and arguments specified in \p nodeParams. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode.
*
* When the graph is launched, the node will invoke the specified CPU function. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * @param nodeParams - Parameters for the host node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuLaunchHostFunc * @see JCudaDriver#cuGraphHostNodeGetParams * @see JCudaDriver#cuGraphHostNodeSetParams * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode */ public static int cuGraphAddHostNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_HOST_NODE_PARAMS nodeParams) { return checkResult(cuGraphAddHostNodeNative(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)); } private static native int cuGraphAddHostNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUDA_HOST_NODE_PARAMS nodeParams); /** * Returns a host node's parameters.
*
* Returns the parameters of host node \p hNode in \p nodeParams. * * @param hNode - Node to get the parameters for * @param nodeParams - Pointer to return the parameters * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuLaunchHostFunc * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphHostNodeSetParams */ public static int cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams) { return checkResult(cuGraphHostNodeGetParamsNative(hNode, nodeParams)); } private static native int cuGraphHostNodeGetParamsNative(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams); /** * Sets a host node's parameters.
*
* Sets the parameters of host node \p hNode to \p nodeParams. * * @param hNode - Node to set the parameters for * @param nodeParams - Parameters to copy * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuLaunchHostFunc * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphHostNodeGetParams */ public static int cuGraphHostNodeSetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams) { return checkResult(cuGraphHostNodeSetParamsNative(hNode, nodeParams)); } private static native int cuGraphHostNodeSetParamsNative(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams); /** * Creates a child graph node and adds it to a graph.
*
* Creates a new node which executes an embedded graph, and adds it to \p hGraph with * \p numDependencies dependencies specified via \p dependencies. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode.
*
* The node executes an embedded child graph. The child graph is cloned in this call. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * @param childGraph - The graph to clone into this node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * * * @see JCudaDriver#cuGraphChildGraphNodeGetGraph * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode * @see JCudaDriver#cuGraphClone */ public static int cuGraphAddChildGraphNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUgraph childGraph) { return checkResult(cuGraphAddChildGraphNodeNative(phGraphNode, hGraph, dependencies, numDependencies, childGraph)); } private static native int cuGraphAddChildGraphNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUgraph childGraph); /** * Gets a handle to the embedded graph of a child graph node.
*
* Gets a handle to the embedded graph in a child graph node. This call * does not clone the graph. Changes to the graph will be reflected in * the node, and the node retains ownership of the graph. * * @param hNode - Node to get the embedded graph for * @param phGraph - Location to store a handle to the graph * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * * * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphNodeFindInClone */ public static int cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph phGraph) { return checkResult(cuGraphChildGraphNodeGetGraphNative(hNode, phGraph)); } private static native int cuGraphChildGraphNodeGetGraphNative(CUgraphNode hNode, CUgraph phGraph); /** * Creates an empty node and adds it to a graph.
*
* Creates a new node which performs no operation, and adds it to \p hGraph with * \p numDependencies dependencies specified via \p dependencies. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode. * * An empty node performs no operation during execution, but can be used for * transitive ordering. For example, a phased execution graph with 2 groups of n * nodes with a barrier between them can be represented using an empty node and * 2*n dependency edges, rather than no empty node and n^2 dependency edges. * * @param phGraphNode - Returns newly created node * @param hGraph - Graph to which to add the node * @param dependencies - Dependencies of the node * @param numDependencies - Number of dependencies * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * * * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode */ public static int cuGraphAddEmptyNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies) { return checkResult(cuGraphAddEmptyNodeNative(phGraphNode, hGraph, dependencies, numDependencies)); } private static native int cuGraphAddEmptyNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies); /** * Creates an event record node and adds it to a graph.
.
* * Creates a new event record node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies and arguments specified in \p params. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode. * * Each launch of the graph will record \p event to capture execution of the * node's dependencies. * * @param phGraphNode Returns newly created node * @param hGraph Graph to which to add the node * @param dependencies Dependencies of the node * @param numDependencies Number of dependencies * @param event Event for the node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_NOT_SUPPORTED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphAddEventWaitNode, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent, * @see JCudaDriver#cuGraphCreate, * @see JCudaDriver#cuGraphDestroyNode, * @see JCudaDriver#cuGraphAddChildGraphNode, * @see JCudaDriver#cuGraphAddEmptyNode, * @see JCudaDriver#cuGraphAddKernelNode, * @see JCudaDriver#cuGraphAddMemcpyNode, * @see JCudaDriver#cuGraphAddMemsetNode, */ public static int cuGraphAddEventRecordNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event) { return checkResult(cuGraphAddEventRecordNodeNative(phGraphNode, hGraph, dependencies, numDependencies, event)); } private static native int cuGraphAddEventRecordNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event); /** * Returns the event associated with an event record node.
* * Returns the event of event record node \p hNode in \p event_out. * * @param hNode Node to get the event for * @param event_out Pointer to return the event * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphAddEventRecordNode, * @see JCudaDriver#cuGraphEventRecordNodeSetEvent, * @see JCudaDriver#cuGraphEventWaitNodeGetEvent, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent */ public static int cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent event_out) { return checkResult(cuGraphEventRecordNodeGetEventNative(hNode, event_out)); } private static native int cuGraphEventRecordNodeGetEventNative(CUgraphNode hNode, CUevent event_out); /** * Sets an event record node's event.
* * Sets the event of event record node \p hNode to \p event. * * @param hNode Node to set the event for * @param event Event to use * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuGraphAddEventRecordNode, * @see JCudaDriver#cuGraphEventRecordNodeGetEvent, * @see JCudaDriver#cuGraphEventWaitNodeSetEvent, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent */ public static int cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) { return checkResult(cuGraphEventRecordNodeSetEventNative(hNode, event)); } private static native int cuGraphEventRecordNodeSetEventNative(CUgraphNode hNode, CUevent event); /** * Creates an event wait node and adds it to a graph.
* * Creates a new event wait node and adds it to \p hGraph with \p numDependencies * dependencies specified via \p dependencies and arguments specified in \p params. * It is possible for \p numDependencies to be 0, in which case the node will be placed * at the root of the graph. \p dependencies may not have any duplicate entries. * A handle to the new node will be returned in \p phGraphNode. * * The graph node will wait for all work captured in \p event. See @see JCudaDriver#cuEventRecord() * for details on what is captured by an event. \p event may be from a different context * or device than the launch stream. * * @param phGraphNode Returns newly created node * @param hGraph Graph to which to add the node * @param dependencies Dependencies of the node * @param numDependencies Number of dependencies * @param event Event for the node * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_NOT_SUPPORTED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphAddEventRecordNode, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent, * @see JCudaDriver#cuGraphCreate, * @see JCudaDriver#cuGraphDestroyNode, * @see JCudaDriver#cuGraphAddChildGraphNode, * @see JCudaDriver#cuGraphAddEmptyNode, * @see JCudaDriver#cuGraphAddKernelNode, * @see JCudaDriver#cuGraphAddMemcpyNode, * @see JCudaDriver#cuGraphAddMemsetNode, */ public static int cuGraphAddEventWaitNode(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event) { return checkResult(cuGraphAddEventWaitNodeNative(phGraphNode, hGraph, dependencies, numDependencies, event)); } private static native int cuGraphAddEventWaitNodeNative(CUgraphNode phGraphNode, CUgraph hGraph, CUgraphNode dependencies[], long numDependencies, CUevent event); /** * Returns the event associated with an event wait node.
* * Returns the event of event wait node \p hNode in \p event_out. * * @param hNode Node to get the event for * @param event_out Pointer to return the event * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphAddEventWaitNode, * @see JCudaDriver#cuGraphEventWaitNodeSetEvent, * @see JCudaDriver#cuGraphEventRecordNodeGetEvent, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent */ public static int cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent event_out) { return checkResult(cuGraphEventWaitNodeGetEventNative(hNode, event_out)); } private static native int cuGraphEventWaitNodeGetEventNative(CUgraphNode hNode, CUevent event_out); /** * Sets an event wait node's event.
* * Sets the event of event wait node \p hNode to \p event. * * @param hNode Node to set the event for * @param event Event to use * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuGraphAddEventWaitNode, * @see JCudaDriver#cuGraphEventWaitNodeGetEvent, * @see JCudaDriver#cuGraphEventRecordNodeSetEvent, * @see JCudaDriver#cuEventRecord, * @see JCudaDriver#cuStreamWaitEvent */ public static int cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) { return checkResult(cuGraphEventWaitNodeSetEventNative(hNode, event)); } private static native int cuGraphEventWaitNodeSetEventNative(CUgraphNode hNode, CUevent event); /** * Clones a graph.
*
* This function creates a copy of \p originalGraph and returns it in \p * phGraphClone. * All parameters are copied into the cloned graph. The original graph may be modified * after this call without affecting the clone.
*
* Child graph nodes in the original graph are recursively copied into the clone. * * @param phGraphClone - Returns newly created cloned graph * @param originalGraph - Graph to clone * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphNodeFindInClone */ public static int cuGraphClone(CUgraph phGraphClone, CUgraph originalGraph) { return checkResult(cuGraphCloneNative(phGraphClone, originalGraph)); } private static native int cuGraphCloneNative(CUgraph phGraphClone, CUgraph originalGraph); /** * Finds a cloned version of a node.
*
* This function returns the node in \p hClonedGraph corresponding to \p hOriginalNode * in the original graph.
*
* \p hClonedGraph must have been cloned from \p hOriginalGraph via ::cuGraphClone. * \p hOriginalNode must have been in \p hOriginalGraph at the time of the call to * ::cuGraphClone, and the corresponding cloned node in \p hClonedGraph must not have * been removed. The cloned node is then returned via \p phClonedNode. * * @param phNode - Returns handle to the cloned node * @param hOriginalNode - Handle to the original node * @param hClonedGraph - Cloned graph to query * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuGraphClone */ public static int cuGraphNodeFindInClone(CUgraphNode phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) { return checkResult(cuGraphNodeFindInCloneNative(phNode, hOriginalNode, hClonedGraph)); } private static native int cuGraphNodeFindInCloneNative(CUgraphNode phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph); /** * Returns a node's type.
*
* Returns the node type of \p hNode in \p type. * * @param hNode - Node to query * @param type - Pointer to return the node type * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphChildGraphNodeGetGraph * @see JCudaDriver#cuGraphKernelNodeGetParams * @see JCudaDriver#cuGraphKernelNodeSetParams * @see JCudaDriver#cuGraphHostNodeGetParams * @see JCudaDriver#cuGraphHostNodeSetParams * @see JCudaDriver#cuGraphMemcpyNodeGetParams * @see JCudaDriver#cuGraphMemcpyNodeSetParams * @see JCudaDriver#cuGraphMemsetNodeGetParams * @see JCudaDriver#cuGraphMemsetNodeSetParams */ public static int cuGraphNodeGetType(CUgraphNode hNode, int type[]) { return checkResult(cuGraphNodeGetTypeNative(hNode, type)); } private static native int cuGraphNodeGetTypeNative(CUgraphNode hNode, int type[]); /** * Returns a graph's nodes.
*
* Returns a list of \p hGraph's nodes. \p nodes may be NULL, in which case this * function will return the number of nodes in \p numNodes. Otherwise, * \p numNodes entries will be filled in. If \p numNodes is higher than the actual * number of nodes, the remaining entries in \p nodes will be set to NULL, and the * number of nodes actually obtained will be returned in \p numNodes. * * @param hGraph - Graph to query * @param nodes - Pointer to return the nodes * @param numNodes - See description * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphNodeGetType * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphNodeGetDependentNodes */ public static int cuGraphGetNodes(CUgraph hGraph, CUgraphNode nodes[], long numNodes[]) { return checkResult(cuGraphGetNodesNative(hGraph, nodes, numNodes)); } private static native int cuGraphGetNodesNative(CUgraph hGraph, CUgraphNode nodes[], long numNodes[]); /** * Returns a graph's root nodes.
*
* Returns a list of \p hGraph's root nodes. \p rootNodes may be NULL, in which case this * function will return the number of root nodes in \p numRootNodes. Otherwise, * \p numRootNodes entries will be filled in. If \p numRootNodes is higher than the actual * number of root nodes, the remaining entries in \p rootNodes will be set to NULL, and the * number of nodes actually obtained will be returned in \p numRootNodes. * * @param hGraph - Graph to query * @param rootNodes - Pointer to return the root nodes * @param numRootNodes - See description * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphNodeGetType * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphNodeGetDependentNodes */ public static int cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode rootNodes[], long numRootNodes[]) { return checkResult(cuGraphGetRootNodesNative(hGraph, rootNodes, numRootNodes)); } private static native int cuGraphGetRootNodesNative(CUgraph hGraph, CUgraphNode rootNodes[], long numRootNodes[]); /** * Returns a graph's dependency edges.
*
* Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding * indices in \p from and \p to; that is, the node in \p to[i] has a dependency on the * node in \p from[i]. \p from and \p to may both be NULL, in which * case this function only returns the number of edges in \p numEdges. Otherwise, * \p numEdges entries will be filled in. If \p numEdges is higher than the actual * number of edges, the remaining entries in \p from and \p to will be set to NULL, and * the number of edges actually returned will be written to \p numEdges. * * @param hGraph - Graph to get the edges from * @param from - Location to return edge endpoints * @param to - Location to return edge endpoints * @param numEdges - See description * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphAddDependencies * @see JCudaDriver#cuGraphRemoveDependencies * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphNodeGetDependentNodes */ public static int cuGraphGetEdges(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numEdges[]) { return checkResult(cuGraphGetEdgesNative(hGraph, from, to, numEdges)); } private static native int cuGraphGetEdgesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numEdges[]); /** * Returns a node's dependencies.
*
* Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this * function will return the number of dependencies in \p numDependencies. Otherwise, * \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual * number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the * number of nodes actually obtained will be returned in \p numDependencies. * * @param hNode - Node to query * @param dependencies - Pointer to return the dependencies * @param numDependencies - See description * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphNodeGetDependentNodes * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphAddDependencies * @see JCudaDriver#cuGraphRemoveDependencies */ public static int cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode dependencies[], long numDependencies[]) { return checkResult(cuGraphNodeGetDependenciesNative(hNode, dependencies, numDependencies)); } private static native int cuGraphNodeGetDependenciesNative(CUgraphNode hNode, CUgraphNode dependencies[], long numDependencies[]); /** * Returns a node's dependent nodes.
*
* Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which * case this function will return the number of dependent nodes in \p numDependentNodes. * Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is * higher than the actual number of dependent nodes, the remaining entries in * \p dependentNodes will be set to NULL, and the number of nodes actually obtained will * be returned in \p numDependentNodes. * * @param hNode - Node to query * @param dependentNodes - Pointer to return the dependent nodes * @param numDependentNodes - See description * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphGetNodes * @see JCudaDriver#cuGraphGetRootNodes * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphAddDependencies * @see JCudaDriver#cuGraphRemoveDependencies */ public static int cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode dependentNodes[], long numDependentNodes[]) { return checkResult(cuGraphNodeGetDependentNodesNative(hNode, dependentNodes, numDependentNodes)); } private static native int cuGraphNodeGetDependentNodesNative(CUgraphNode hNode, CUgraphNode dependentNodes[], long numDependentNodes[]); /** * Adds dependency edges to a graph.
*
* The number of dependencies to be added is defined by \p numDependencies * Elements in \p from and \p to at corresponding indices define a dependency. * Each node in \p from and \p to must belong to \p hGraph.
*
* If \p numDependencies is 0, elements in \p from and \p to will be ignored. * Specifying an existing dependency will return an error.
* * @param hGraph - Graph to which dependencies are added * @param from - Array of nodes that provide the dependencies * @param to - Array of dependent nodes * @param numDependencies - Number of dependencies to be added * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphRemoveDependencies * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphNodeGetDependentNodes */ public static int cuGraphAddDependencies(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies) { return checkResult(cuGraphAddDependenciesNative(hGraph, from, to, numDependencies)); } private static native int cuGraphAddDependenciesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies); /** * Removes dependency edges from a graph.
*
* The number of \p dependencies to be removed is defined by \p numDependencies. * Elements in \p from and \p to at corresponding indices define a dependency. * Each node in \p from and \p to must belong to \p hGraph.
*
* If \p numDependencies is 0, elements in \p from and \p to will be ignored. * Specifying a non-existing dependency will return an error. * * @param hGraph - Graph from which to remove dependencies * @param from - Array of nodes that provide the dependencies * @param to - Array of dependent nodes * @param numDependencies - Number of dependencies to be removed * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphAddDependencies * @see JCudaDriver#cuGraphGetEdges * @see JCudaDriver#cuGraphNodeGetDependencies * @see JCudaDriver#cuGraphNodeGetDependentNodes */ public static int cuGraphRemoveDependencies(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies) { return checkResult(cuGraphRemoveDependenciesNative(hGraph, from, to, numDependencies)); } private static native int cuGraphRemoveDependenciesNative(CUgraph hGraph, CUgraphNode from[], CUgraphNode to[], long numDependencies); /** * Remove a node from the graph.
*
* Removes \p hNode from its graph. This operation also severs any dependencies of other nodes * on \p hNode and vice versa.
* * @param hNode - Node to remove * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphAddChildGraphNode * @see JCudaDriver#cuGraphAddEmptyNode * @see JCudaDriver#cuGraphAddKernelNode * @see JCudaDriver#cuGraphAddHostNode * @see JCudaDriver#cuGraphAddMemcpyNode * @see JCudaDriver#cuGraphAddMemsetNode */ public static int cuGraphDestroyNode(CUgraphNode hNode) { return checkResult(cuGraphDestroyNodeNative(hNode)); } private static native int cuGraphDestroyNodeNative(CUgraphNode hNode); /** * Creates an executable graph from a graph.
*
* Instantiates \p hGraph as an executable graph. The graph is validated for any * structural constraints or intra-node constraints which were not previously * validated. If instantiation is successful, a handle to the instantiated graph * is returned in \p graphExec.
*
* If there are any errors, diagnostic information may be returned in \p errorNode and * \p logBuffer. This is the primary way to inspect instantiation errors. The output * will be null terminated unless the diagnostics overflow * the buffer. In this case, they will be truncated, and the last byte can be * inspected to determine if truncation occurred.
* * @param phGraphExec - Returns instantiated graph * @param hGraph - Graph to instantiate * @param phErrorNode - In case of an instantiation error, this may be modified to * indicate a node contributing to the error * @param logBuffer - A character buffer to store diagnostic messages * @param bufferSize - Size of the log buffer in bytes * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphLaunch * @see JCudaDriver#cuGraphExecDestroy */ public static int cuGraphInstantiate(CUgraphExec phGraphExec, CUgraph hGraph, CUgraphNode phErrorNode, byte logBuffer[], long bufferSize) { return checkResult(cuGraphInstantiateNative(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize)); } private static native int cuGraphInstantiateNative(CUgraphExec phGraphExec, CUgraph hGraph, CUgraphNode phErrorNode, byte logBuffer[], long bufferSize); /** * Sets the parameters for a kernel node in the given graphExec.
*
* Sets the parameters of a kernel node in an executable graph \p hGraphExec. * The node is identified by the corresponding node \p hNode in the * non-executable graph, from which the executable graph was instantiated.
*
* \p hNode must not have been removed from the original graph. The \p func field * of \p nodeParams cannot be modified and must match the original value. * All other values can be modified.
*
* The modifications take effect at the next launch of \p hGraphExec. Already * enqueued or running launches of \p hGraphExec are not affected by this call. * \p hNode is also not modified by this call.
*
* @param hGraphExec - The executable graph in which to set the specified node * @param hNode - kernel node from the graph from which graphExec was instantiated * @param nodeParams - Updated Parameters to set * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuGraphAddKernelNode, * @see JCudaDriver#cuGraphKernelNodeSetParams, * @see JCudaDriver#cuGraphInstantiate */ public static int cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams) { return checkResult(cuGraphExecKernelNodeSetParamsNative(hGraphExec, hNode, nodeParams)); } private static native int cuGraphExecKernelNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS nodeParams); /** * Sets the parameters for a memcpy node in the given graphExec.
*
* Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had * contained \p copyParams at instantiation. hNode must remain in the graph which was * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
*
* The source and destination memory in \p copyParams must be allocated from the same * contexts as the original source and destination memory. Both the instantiation-time * memory operands and the memory operands in \p copyParams must be 1-dimensional. * Zero-length operations are not supported.
*
* The modifications only affect future launches of \p hGraphExec. Already enqueued * or running launches of \p hGraphExec are not affected by this call. hNode is also * not modified by this call.
*
* Returns CUDA_ERROR_INVALID_VALUE if the memory operands’ mappings changed or * either the original or new memory operands are multidimensional. *
* @param hGraphExec The executable graph in which to set the specified node * @param hNode Memcpy node from the graph which was used to instantiate graphExec * @param copyParams The updated parameters to set * @param ctx Context on which to run the node * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphInstantiate, * @see JCudaDriver#cuGraphExecKernelNodeSetParams * @see JCudaDriver#cuGraphExecMemsetNodeSetParams * @see JCudaDriver#cuGraphExecHostNodeSetParams */ public static int cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMCPY3D copyParams, CUcontext ctx) { return checkResult(cuGraphExecMemcpyNodeSetParamsNative(hGraphExec, hNode, copyParams, ctx)); } private static native int cuGraphExecMemcpyNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMCPY3D copyParams, CUcontext ctx); /** * Sets the parameters for a memset node in the given graphExec.
*
* Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had * contained \p memsetParams at instantiation. hNode must remain in the graph which was * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
*
* The destination memory in \p memsetParams must be allocated from the same * contexts as the original destination memory. Both the instantiation-time * memory operand and the memory operand in \p memsetParams must be 1-dimensional. * Zero-length operations are not supported.
*
* The modifications only affect future launches of \p hGraphExec. Already enqueued * or running launches of \p hGraphExec are not affected by this call. hNode is also * not modified by this call.
*
* Returns CUDA_ERROR_INVALID_VALUE if the memory operand’s mappings changed or * either the original or new memory operand are multidimensional. * * @param hGraphExec The executable graph in which to set the specified node * @param hNode Memset node from the graph which was used to instantiate graphExec * @param memsetParams The updated parameters to set * @param ctx Context on which to run the node * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphInstantiate * @see JCudaDriver#cuGraphExecKernelNodeSetParams * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams * @see JCudaDriver#cuGraphExecHostNodeSetParams */ public static int cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx) { return checkResult(cuGraphExecMemsetNodeSetParamsNative(hGraphExec, hNode, memsetParams, ctx)); } private static native int cuGraphExecMemsetNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS memsetParams, CUcontext ctx); /** * Sets the parameters for a host node in the given graphExec.
*
* Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had * contained \p nodeParams at instantiation. hNode must remain in the graph which was * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
*
* The modifications only affect future launches of \p hGraphExec. Already enqueued * or running launches of \p hGraphExec are not affected by this call. hNode is also * not modified by this call.
* * @param hGraphExec The executable graph in which to set the specified node * @param hNode Host node from the graph which was used to instantiate graphExec * @param nodeParams The updated parameters to set * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphInstantiate * @see JCudaDriver#cuGraphExecKernelNodeSetParams * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams * @see JCudaDriver#cuGraphExecMemsetNodeSetParams */ public static int cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams) { return checkResult(cuGraphExecHostNodeSetParamsNative(hGraphExec, hNode, nodeParams)); } private static native int cuGraphExecHostNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_HOST_NODE_PARAMS nodeParams); /** * Updates node parameters in the child graph node in the given graphExec.

* * Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained * in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation. * \p hNode must remain in the graph which was used to instantiate \p hGraphExec. * Changed edges to and from \p hNode are ignored.

* * The modifications only affect future launches of \p hGraphExec. Already enqueued * or running launches of \p hGraphExec are not affected by this call. \p hNode is also * not modified by this call.

* * The topology of \p childGraph, as well as the node insertion order, must match that * of the graph contained in \p hNode. See ::cuGraphExecUpdate() for a list of restrictions * on what can be updated in an instantiated graph. The update is recursive, so child graph * nodes contained within the top level child graph will also be updated.

* * @param hGraphExec The executable graph in which to set the specified node * @param hNode Host node from the graph which was used to instantiate graphExec * @param childGraph The graph supplying the updated parameters * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuGraphInstantiate * @see JCudaDriver#cuGraphExecUpdate * @see JCudaDriver#cuGraphExecKernelNodeSetParams * @see JCudaDriver#cuGraphExecMemcpyNodeSetParams * @see JCudaDriver#cuGraphExecMemsetNodeSetParams */ public static int cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph) { return checkResult(cuGraphExecChildGraphNodeSetParamsNative(hGraphExec, hNode, childGraph)); } private static native int cuGraphExecChildGraphNodeSetParamsNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph); /** * Sets the event for an event record node in the given graphExec.

* * Sets the event of an event record node in an executable graph \p hGraphExec. * The node is identified by the corresponding node \p hNode in the * non-executable graph, from which the executable graph was instantiated.

* * The modifications only affect future launches of \p hGraphExec. Already * enqueued or running launches of \p hGraphExec are not affected by this call. * \p hNode is also not modified by this call.

* * @param hGraphExec The executable graph in which to set the specified node * @param hNode event record node from the graph from which graphExec was instantiated * @param event Updated event to use * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuGraphAddEventRecordNode * @see JCudaDriver#cuGraphEventRecordNodeGetEvent * @see JCudaDriver#cuGraphEventWaitNodeSetEvent * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphInstantiate */ public static int cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) { return checkResult(cuGraphExecEventRecordNodeSetEventNative(hGraphExec, hNode, event)); } private static native int cuGraphExecEventRecordNodeSetEventNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event); /** * Sets the event for an event record node in the given graphExec.

* * Sets the event of an event record node in an executable graph \p hGraphExec. * The node is identified by the corresponding node \p hNode in the * non-executable graph, from which the executable graph was instantiated.

* * The modifications only affect future launches of \p hGraphExec. Already * enqueued or running launches of \p hGraphExec are not affected by this call. * \p hNode is also not modified by this call.

* * @param hGraphExec The executable graph in which to set the specified node * @param hNode event wait node from the graph from which graphExec was instantiated * @param event Updated event to use * * @return * CUDA_SUCCESS, * CUDA_ERROR_INVALID_VALUE, * * @see JCudaDriver#cuGraphAddEventWaitNode * @see JCudaDriver#cuGraphEventWaitNodeGetEvent * @see JCudaDriver#cuGraphEventRecordNodeSetEvent * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuGraphCreate * @see JCudaDriver#cuGraphDestroyNode * @see JCudaDriver#cuGraphInstantiate */ public static int cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) { return checkResult(cuGraphExecEventWaitNodeSetEventNative(hGraphExec, hNode, event)); } private static native int cuGraphExecEventWaitNodeSetEventNative(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event); /** *
      * \brief Sets the parameters for an external semaphore signal node in the given graphExec
      *
      * Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.
      *
      * \p hNode must not have been removed from the original graph.
      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.
      *
      * Changing \p nodeParams->numExtSems is not supported.
      *
      * \param hGraphExec - The executable graph in which to set the specified node
      * \param hNode      - semaphore signal node from the graph from which graphExec was instantiated
      * \param nodeParams - Updated Parameters to set
      *
      * \return
      * ::CUDA_SUCCESS,
      * ::CUDA_ERROR_INVALID_VALUE,
      * \note_graph_thread_safety
      * \notefnerr
      *
      * \sa
      * ::cuGraphAddExternalSemaphoresSignalNode,
      * ::cuImportExternalSemaphore,
      * ::cuSignalExternalSemaphoresAsync,
      * ::cuWaitExternalSemaphoresAsync,
      * ::cuGraphCreate,
      * ::cuGraphDestroyNode,
      * ::cuGraphInstantiate
      * 
*/ public static int cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams[]) { // TODO Not supported. Pull requests welcome. throw new UnsupportedOperationException("The cuGraphExecExternalSemaphoresSignalNodeSetParams function is not supported in JCuda"); } /** *
      * \brief Sets the parameters for an external semaphore wait node in the given graphExec
      *
      * Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec.
      * The node is identified by the corresponding node \p hNode in the
      * non-executable graph, from which the executable graph was instantiated.
      *
      * \p hNode must not have been removed from the original graph.
      *
      * The modifications only affect future launches of \p hGraphExec. Already
      * enqueued or running launches of \p hGraphExec are not affected by this call.
      * \p hNode is also not modified by this call.
      *
      * Changing \p nodeParams->numExtSems is not supported.
      *
      * \param hGraphExec - The executable graph in which to set the specified node
      * \param hNode      - semaphore wait node from the graph from which graphExec was instantiated
      * \param nodeParams - Updated Parameters to set
      *
      * \return
      * ::CUDA_SUCCESS,
      * ::CUDA_ERROR_INVALID_VALUE,
      * \note_graph_thread_safety
      * \notefnerr
      *
      * \sa
      * ::cuGraphAddExternalSemaphoresWaitNode,
      * ::cuImportExternalSemaphore,
      * ::cuSignalExternalSemaphoresAsync,
      * ::cuWaitExternalSemaphoresAsync,
      * ::cuGraphCreate,
      * ::cuGraphDestroyNode,
      * ::cuGraphInstantiate
      * 
*/ public static int cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams[]) { // TODO Not supported. Pull requests welcome. throw new UnsupportedOperationException("The cuGraphExecExternalSemaphoresWaitNodeSetParams function is not supported in JCuda"); } /** * Uploads an executable graph in a stream.

* * Uploads \p hGraphExec to the device in \p hStream without executing it. Uploads of * the same \p hGraphExec will be serialized. Each upload is ordered behind both any * previous work in \p hStream and any previous launches of \p hGraphExec.

* * @param hGraphExec Executable graph to upload * @param hStream Stream in which to upload the graph * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphInstantiate * @see JCudaDriver#cuGraphLaunch * @see JCudaDriver#cuGraphExecDestroy */ public static int cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { return checkResult(cuGraphUploadNative(hGraphExec, hStream)); } private static native int cuGraphUploadNative(CUgraphExec hGraphExec, CUstream hStream); /** * Launches an executable graph in a stream.
*
* Executes \p hGraphExec in \p hStream. Only one instance of \p hGraphExec may be executing * at a time. Each launch is ordered behind both any previous work in \p hStream * and any previous launches of \p hGraphExec. To execute a graph concurrently, it must be * instantiated multiple times into multiple executable graphs. * * @param hGraphExec - Executable graph to launch * @param hStream - Stream in which to launch the graph * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see * JCudaDriver#cuGraphInstantiate * JCudaDriver#cuGraphExecDestroy */ public static int cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { return checkResult(cuGraphLaunchNative(hGraphExec, hStream)); } private static native int cuGraphLaunchNative(CUgraphExec hGraphExec, CUstream hStream); /** * Destroys an executable graph.
*
* Destroys the executable graph specified by \p hGraphExec, as well * as all of its executable nodes. If the executable graph is * in-flight, it will not be terminated, but rather freed * asynchronously on completion. * * @param hGraphExec - Executable graph to destroy * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see * JCudaDriver#cuGraphInstantiate * JCudaDriver#cuGraphLaunch */ public static int cuGraphExecDestroy(CUgraphExec hGraphExec) { return checkResult(cuGraphExecDestroyNative(hGraphExec)); } private static native int cuGraphExecDestroyNative(CUgraphExec hGraphExec); /** * Destroys a graph.
*
* Destroys the graph specified by \p hGraph, as well as all of its nodes. * * @param hGraph - Graph to destroy * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see * JCudaDriver#cuGraphCreate */ public static int cuGraphDestroy(CUgraph hGraph) { return checkResult(cuGraphDestroyNative(hGraph)); } private static native int cuGraphDestroyNative(CUgraph hGraph); /** * Check whether an executable graph can be updated with a graph and perform the update if possible.
*
* Updates the node parameters in the instantiated graph specified by \p hGraphExec with the * node parameters in a topologically identical graph specified by \p hGraph.
* * Limitations: *
     * - Kernel nodes:
     *   - The function must not change (same restriction as cuGraphExecKernelNodeSetParams())
     * - Memset and memcpy nodes:
     *   - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.
     *   - The source/destination memory must be allocated from the same contexts as the original
     *     source/destination memory.
     *   - Only 1D memsets can be changed.
     * - Additional memcpy node restrictions:
     *   - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE,
     *     CU_MEMORYTYPE_ARRAY, etc.) is not supported.
     * 
* Note: The API may add further restrictions in future releases. The return code should always be checked. *
 
     * Some node types are not currently supported:
     * - Empty graph nodes(CU_GRAPH_NODE_TYPE_EMPTY)
     * - Child graphs(CU_GRAPH_NODE_TYPE_GRAPH).
     * 
* cuGraphExecUpdate sets \p updateResult_out to CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED under * the following conditions: *
     * - The count of nodes directly in \p hGraphExec and \p hGraph differ, in which case \p hErrorNode_out
     *   is NULL.
     * - A node is deleted in \p hGraph but not not its pair from \p hGraphExec, in which case \p hErrorNode_out
     *   is NULL.
     * - A node is deleted in \p hGraphExec but not its pair from \p hGraph, in which case \p hErrorNode_out is
     *   the pairless node from \p hGraph.
     * - The dependent nodes of a pair differ, in which case \p hErrorNode_out is the node from \p hGraph.
     * 
* cuGraphExecUpdate sets \p updateResult_out to: *
     * - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology changed
     * - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node changed, in which case
     *   \p hErrorNode_out is set to the node from \p hGraph.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED if the func field of a kernel changed, in which
     *   case \p hErrorNode_out is set to the node from \p hGraph
     * - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a node changed in a way 
     *   that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph.
     * - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is unsupported, like 
     *   the node’s type or configuration, in which case \p hErrorNode_out is set to the node from \p hGraph
     * 
* If \p updateResult_out isn’t set in one of the situations described above, the update check passes * and cuGraphExecUpdate updates \p hGraphExec to match the contents of \p hGraph. If an error happens * during the update, \p updateResult_out will be set to CU_GRAPH_EXEC_UPDATE_ERROR; otherwise, * \p updateResult_out is set to CU_GRAPH_EXEC_UPDATE_SUCCESS.
*
* cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed successfully. It returns * CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the graph update was not performed because it included * changes which violated constraints specific to instantiated graph update.
* * @param hGraphExec The instantiated graph to be updated * @param hGraph The graph containing the updated parameters * @param hErrorNode_out The node which caused the permissibility check to forbid the update, if any * @param updateResult_out Whether the graph update was permitted. If was forbidden, the reason why * * @return CUDA_SUCCESS, CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE * * @see JCudaDriver#cuGraphInstantiate */ public static int cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode hErrorNode_out, int updateResult_out[]) { return checkResult(cuGraphExecUpdateNative(hGraphExec, hGraph, hErrorNode_out, updateResult_out)); } private static native int cuGraphExecUpdateNative(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode hErrorNode_out, int updateResult_out[]); /** * Copies attributes from source node to destination node. * * Copies attributes from source node src to destination node dst. * Both node must have the same context. * * @param dst Destination node * @param src Source node * For list of attributes see ::CUkernelNodeAttrID * * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE * * @see CUaccessPolicyWindow */ public static int cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src) { return checkResult(cuGraphKernelNodeCopyAttributesNative(dst, src)); } private static native int cuGraphKernelNodeCopyAttributesNative(CUgraphNode dst, CUgraphNode src); /** * Queries node attribute. * * Queries attribute attr from node hNode and stores it in corresponding * member of value_out. * * @param hNode * @param attr * @param value_out * * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE * * @see CUaccessPolicyWindow */ public static int cuGraphKernelNodeGetAttribute(CUgraphNode hNode, int attr, CUkernelNodeAttrValue value_out) { return checkResult(cuGraphKernelNodeGetAttributeNative(hNode, attr, value_out)); } private static native int cuGraphKernelNodeGetAttributeNative(CUgraphNode hNode, int attr, CUkernelNodeAttrValue value_out); /** * Sets node attribute. * * Sets attribute attr on node hNode from corresponding attribute of * value. * * @param hNode * @param attr * @param value * * @return CUDA_SUCCESS, UDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE * * @see CUaccessPolicyWindow */ public static int cuGraphKernelNodeSetAttribute(CUgraphNode hNode, int attr, CUkernelNodeAttrValue value) { return checkResult(cuGraphKernelNodeSetAttributeNative(hNode, attr, value)); } private static native int cuGraphKernelNodeSetAttributeNative(CUgraphNode hNode, int attr, CUkernelNodeAttrValue value); /** *
     * \brief Returns occupancy of a function
     *
     * Returns in \p *numBlocks the number of the maximum active blocks per
     * streaming multiprocessor.
     *
     * \param numBlocks       - Returned occupancy
     * \param func            - Kernel for which occupancy is calulated
     * \param blockSize       - Block size the kernel is intended to be launched with
     * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
*/ public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize) { return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorNative(numBlocks, func, blockSize, dynamicSMemSize)); } private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize); /** *
     * \brief Suggest a launch configuration with reasonable occupancy
     *
     * An extended version of ::cuOccupancyMaxPotentialBlockSize. In
     * addition to arguments passed to ::cuOccupancyMaxPotentialBlockSize,
     * ::cuOccupancyMaxPotentialBlockSizeWithFlags also takes a \p Flags
     * parameter.
     *
     * The \p Flags parameter controls how special cases are handled. The
     * valid flags are:
     *
     * - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as
     *   ::cuOccupancyMaxPotentialBlockSize;
     *
     * - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the
     *   default behavior on platform where global caching affects
     *   occupancy. On such platforms, the launch configurations that
     *   produces maximal occupancy might not support global
     *   caching. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE
     *   guarantees that the the produced launch configuration is global
     *   caching compatible at a potential cost of occupancy. More information
     *   can be found about this feature in the "Unified L1/Texture Cache"
     *   section of the Maxwell tuning guide.
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calculated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     * \param flags       - Options
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     *
     * \sa
     * ::cudaOccupancyMaxPotentialBlockSizeWithFlags
     * 
*/ public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags) { return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(numBlocks, func, blockSize, dynamicSMemSize, flags)); } private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags); /** *
     * \brief Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM 
     *
     * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM. 
     *
     * \param dynamicSmemSize - Returned maximum dynamic shared memory 
     * \param func            - Kernel function for which occupancy is calculated
     * \param numBlocks       - Number of blocks to fit on SM 
     * \param blockSize       - Size of the blocks
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     *
     * \sa
     * 
*/ public static int cuOccupancyAvailableDynamicSMemPerBlock(long dynamicSmemSize[], CUfunction func, int numBlocks, int blockSize) { return checkResult(cuOccupancyAvailableDynamicSMemPerBlockNative(dynamicSmemSize, func, numBlocks, blockSize)); } private static native int cuOccupancyAvailableDynamicSMemPerBlockNative(long dynamicSmemSize[], CUfunction func, int numBlocks, int blockSize); /** *
     * \brief Suggest a launch configuration with reasonable occupancy
     *
     * Returns in \p *blockSize a reasonable block size that can achieve
     * the maximum occupancy (or, the maximum number of active warps with
     * the fewest blocks per multiprocessor), and in \p *minGridSize the
     * minimum grid size to achieve the maximum occupancy.
     *
     * If \p blockSizeLimit is 0, the configurator will use the maximum
     * block size permitted by the device / function instead.
     *
     * If per-block dynamic shared memory allocation is not needed, the
     * user should leave both \p blockSizeToDynamicSMemSize and \p
     * dynamicSMemSize as 0.
     *
     * If per-block dynamic shared memory allocation is needed, then if
     * the dynamic shared memory size is constant regardless of block
     * size, the size should be passed through \p dynamicSMemSize, and \p
     * blockSizeToDynamicSMemSize should be NULL.
     *
     * Otherwise, if the per-block dynamic shared memory size varies with
     * different block sizes, the user needs to provide a unary function
     * through \p blockSizeToDynamicSMemSize that computes the dynamic
     * shared memory needed by \p func for any given block size. \p
     * dynamicSMemSize is ignored. An example signature is:
     *
     * \code
     *    // Take block size, returns dynamic shared memory needed
     *    size_t blockToSmem(int blockSize);
     * \endcode
     *
     * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
     * \param blockSize   - Returned maximum block size that can achieve the maximum occupancy
     * \param func        - Kernel for which launch configuration is calulated
     * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
     * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
     * \param blockSizeLimit  - The maximum block size \p func is designed to handle
     *
     * \return
     * ::CUDA_SUCCESS,
     * ::CUDA_ERROR_DEINITIALIZED,
     * ::CUDA_ERROR_NOT_INITIALIZED,
     * ::CUDA_ERROR_INVALID_CONTEXT,
     * ::CUDA_ERROR_INVALID_VALUE,
     * ::CUDA_ERROR_UNKNOWN
     * \notefnerr
     * 
*/ public static int cuOccupancyMaxPotentialBlockSize(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit) { // The callback involves a state on the native side, // so ensure synchronization here synchronized (OCCUPANCY_LOCK) { return checkResult(cuOccupancyMaxPotentialBlockSizeNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit)); } } private static native int cuOccupancyMaxPotentialBlockSizeNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit); public static int cuOccupancyMaxPotentialBlockSizeWithFlags(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags) { // The callback involves a state on the native side, // so ensure synchronization here synchronized (OCCUPANCY_LOCK) { return checkResult(cuOccupancyMaxPotentialBlockSizeWithFlagsNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags)); } } private static native int cuOccupancyMaxPotentialBlockSizeWithFlagsNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags); private static final Object OCCUPANCY_LOCK = new Object(); /** * Launches a CUDA function. * *
     * CUresult cuLaunch (
     *      CUfunction f )
     * 
*
*

Launches a CUDA function. * Deprecated Invokes the kernel f * on a 1 x 1 x 1 grid of blocks. The block contains the number of threads * specified by a previous call to cuFuncSetBlockShape(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuLaunch(CUfunction f) { return checkResult(cuLaunchNative(f)); } private static native int cuLaunchNative(CUfunction f); /** * Launches a CUDA function. * *
     * CUresult cuLaunchGrid (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height )
     * 
*
*

Launches a CUDA function. * Deprecated Invokes the kernel f * on a grid_width x grid_height grid of blocks. Each * block contains the number of threads specified by a previous call to * cuFuncSetBlockShape(). *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * @param grid_width Width of grid in blocks * @param grid_height Height of grid in blocks * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGridAsync * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height) { return checkResult(cuLaunchGridNative(f, grid_width, grid_height)); } private static native int cuLaunchGridNative(CUfunction f, int grid_width, int grid_height); /** * Launches a CUDA function. * *
     * CUresult cuLaunchGridAsync (
     *      CUfunction f,
     *      int  grid_width,
     *      int  grid_height,
     *      CUstream hStream )
     * 
*
*

Launches a CUDA function. * Deprecated Invokes the kernel f * on a grid_width x grid_height grid of blocks. Each * block contains the number of threads specified by a previous call to * cuFuncSetBlockShape(). *

*

cuLaunchGridAsync() can optionally be * associated to a stream by passing a non-zero hStream * argument. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param f Kernel to launch * @param grid_width Width of grid in blocks * @param grid_height Height of grid in blocks * @param hStream Stream identifier * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_FAILED, * CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT, * CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuFuncSetBlockShape * @see JCudaDriver#cuFuncSetSharedSize * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuParamSetSize * @see JCudaDriver#cuParamSetf * @see JCudaDriver#cuParamSeti * @see JCudaDriver#cuParamSetv * @see JCudaDriver#cuLaunch * @see JCudaDriver#cuLaunchGrid * @see JCudaDriver#cuLaunchKernel * * @deprecated Deprecated in CUDA */ @Deprecated public static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) { return checkResult(cuLaunchGridAsyncNative(f, grid_width, grid_height, hStream)); } private static native int cuLaunchGridAsyncNative(CUfunction f, int grid_width, int grid_height, CUstream hStream); /** * Creates an event. * *
     * CUresult cuEventCreate (
     *      CUevent* phEvent,
     *      unsigned int  Flags )
     * 
*
*

Creates an event. Creates an event * *phEvent with the flags specified via Flags. Valid flags * include: *

    *
  • *

    CU_EVENT_DEFAULT: Default event * creation flag. *

    *
  • *
  • *

    CU_EVENT_BLOCKING_SYNC: * Specifies that the created event should use blocking synchronization. * A CPU thread that uses cuEventSynchronize() to wait on an event created * with this flag will block until the event has actually been recorded. *

    *
  • *
  • *

    CU_EVENT_DISABLE_TIMING: * Specifies that the created event does not need to record timing data. * Events created with this flag specified and the CU_EVENT_BLOCKING_SYNC * flag not specified will provide the best performance when used with * cuStreamWaitEvent() and cuEventQuery(). *

    *
  • *
  • *

    CU_EVENT_INTERPROCESS: Specifies * that the created event may be used as an interprocess event by * cuIpcGetEventHandle(). CU_EVENT_INTERPROCESS must be specified along * with CU_EVENT_DISABLE_TIMING. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param phEvent Returns newly created event * @param Flags Event creation flags * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventCreate(CUevent phEvent, int Flags) { return checkResult(cuEventCreateNative(phEvent, Flags)); } private static native int cuEventCreateNative(CUevent phEvent, int Flags); /** * Records an event. * *
     * CUresult cuEventRecord (
     *      CUevent hEvent,
     *      CUstream hStream )
     * 
*
*

Records an event. Records an event. If * hStream is non-zero, the event is recorded after all preceding * operations in hStream have been completed; otherwise, it is * recorded after all preceding operations in the CUDA context have been * completed. Since * operation is asynchronous, cuEventQuery * and/or cuEventSynchronize() must be used to determine when the event * has actually been recorded. *

*

If cuEventRecord() has previously been * called on hEvent, then this call will overwrite any existing * state in hEvent. Any subsequent calls which examine the * status of hEvent will only examine the completion of this * most recent call to cuEventRecord(). *

*

It is necessary that hEvent * and hStream be created on the same context. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to record * @param hStream Stream to record event for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventRecord(CUevent hEvent, CUstream hStream) { return checkResult(cuEventRecordNative(hEvent, hStream)); } private static native int cuEventRecordNative(CUevent hEvent, CUstream hStream); /** * Records an event. *

* Captures in \p hEvent the contents of \p hStream at the time of this call. * \p hEvent and \p hStream must be from the same context. * Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then * examine or wait for completion of the work that was captured. Uses of * \p hStream after this call do not modify \p hEvent. See note on default * stream behavior for what is captured in the default case. *

* ::cuEventRecordWithFlags() can be called multiple times on the same event and * will overwrite the previously captured state. Other APIs such as * ::cuStreamWaitEvent() use the most recently captured state at the time * of the API call, and are not affected by later calls to * ::cuEventRecordWithFlags(). Before the first call to ::cuEventRecordWithFlags(), an * event represents an empty set of work, so for example ::cuEventQuery() * would return ::CUDA_SUCCESS. *

* flags include: * - ::CU_EVENT_RECORD_DEFAULT: Default event creation flag. * - ::CU_EVENT_RECORD_EXTERNAL: Event is captured in the graph as an external * event node when performing stream capture. This flag is invalid outside * of stream capture. * * @param hEvent Event to record * @param hStream Stream to record event for * @param flags See ::CUevent_capture_flags * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cudaEventRecord */ public static int cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, int flags) { return checkResult(cuEventRecordWithFlagsNative(hEvent, hStream, flags)); } private static native int cuEventRecordWithFlagsNative(CUevent hEvent, CUstream hStream, int flags); /** * Queries an event's status. * *
     * CUresult cuEventQuery (
     *      CUevent hEvent )
     * 
*
*

Queries an event's status. Query the * status of all device work preceding the most recent call to * cuEventRecord() (in the appropriate compute streams, as specified by * the arguments to cuEventRecord()). *

*

If this work has successfully been * completed by the device, or if cuEventRecord() has not been called on * hEvent, then CUDA_SUCCESS is returned. If this work has not * yet been completed by the device then CUDA_ERROR_NOT_READY is * returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to query * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventQuery(CUevent hEvent) { return checkResult(cuEventQueryNative(hEvent)); } private static native int cuEventQueryNative(CUevent hEvent); /** * Waits for an event to complete. * *
     * CUresult cuEventSynchronize (
     *      CUevent hEvent )
     * 
*
*

Waits for an event to complete. Wait * until the completion of all device work preceding the most recent call * to cuEventRecord() (in the appropriate compute streams, as specified * by the arguments to cuEventRecord()). *

*

If cuEventRecord() has not been called * on hEvent, CUDA_SUCCESS is returned immediately. *

*

Waiting for an event that was created * with the CU_EVENT_BLOCKING_SYNC flag will cause the calling CPU thread * to block until the event has been completed by the device. If the * CU_EVENT_BLOCKING_SYNC flag has not been set, then the CPU thread will * busy-wait until the event has been completed by the device. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to wait for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventDestroy * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventSynchronize(CUevent hEvent) { return checkResult(cuEventSynchronizeNative(hEvent)); } private static native int cuEventSynchronizeNative(CUevent hEvent); /** * Destroys an event. * *
     * CUresult cuEventDestroy (
     *      CUevent hEvent )
     * 
*
*

Destroys an event. Destroys the event * specified by hEvent. *

*

In case hEvent has been * recorded but has not yet been completed when cuEventDestroy() is * called, the function will return immediately and the resources * associated with hEvent will be released automatically once * the device has completed hEvent. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hEvent Event to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventElapsedTime */ public static int cuEventDestroy(CUevent hEvent) { return checkResult(cuEventDestroyNative(hEvent)); } private static native int cuEventDestroyNative(CUevent hEvent); /** * Computes the elapsed time between two events. * *
     * CUresult cuEventElapsedTime (
     *      float* pMilliseconds,
     *      CUevent hStart,
     *      CUevent hEnd )
     * 
*
*

Computes the elapsed time between two * events. Computes the elapsed time between two events (in milliseconds * with a resolution * of around 0.5 microseconds). *

*

If either event was last recorded in a * non-NULL stream, the resulting time may be greater than expected (even * if both used * the same stream handle). This happens * because the cuEventRecord() operation takes place asynchronously and * there is no guarantee that the measured latency is actually just * between the two * events. Any number of other different * stream operations could execute in between the two measured events, * thus altering the * timing in a significant way. *

*

If cuEventRecord() has not been called * on either event then CUDA_ERROR_INVALID_HANDLE is returned. If * cuEventRecord() has been called on both events but one or both of them * has not yet been completed (that is, cuEventQuery() would return * CUDA_ERROR_NOT_READY on at least one of the events), CUDA_ERROR_NOT_READY * is returned. If either event was created with the CU_EVENT_DISABLE_TIMING * flag, then this function will return CUDA_ERROR_INVALID_HANDLE. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pMilliseconds Time between hStart and hEnd in ms * @param hStart Starting event * @param hEnd Ending event * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuEventCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuEventQuery * @see JCudaDriver#cuEventSynchronize * @see JCudaDriver#cuEventDestroy */ public static int cuEventElapsedTime(float pMilliseconds[], CUevent hStart, CUevent hEnd) { return checkResult(cuEventElapsedTimeNative(pMilliseconds, hStart, hEnd)); } private static native int cuEventElapsedTimeNative(float pMilliseconds[], CUevent hStart, CUevent hEnd); /** * Wait on a memory location.
*
* Enqueues a synchronization of the stream on the given memory location. * Work ordered after the operation will block until the given condition on * the memory is satisfied. By default, the condition is to wait for * (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other condition * types can be specified via flags.
*
* If the memory was registered via cuMemHostRegister(), the device pointer * should be obtained with cuMemHostGetDevicePointer(). This function cannot * be used with managed memory (cuMemAllocManaged).
*
* On Windows, the device must be using TCC, or the operation is not * supported. See cuDeviceGetAttributes(). * * @param stream The stream to synchronize on the memory location. * @param addr The memory location to wait on. * @param value The value to compare with the memory location. * @param flags See {@link CUstreamWaitValue_flags} * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamWriteValue32 * @see JCudaDriver#cuStreamBatchMemOp * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuStreamWaitEvent */ public static int cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, int value, int flags) { return checkResult(cuStreamWaitValue32Native(stream, addr, value, flags)); } private static native int cuStreamWaitValue32Native(CUstream stream, CUdeviceptr addr, int value, int flags); /** * Write a value to memory.
*
* Write a value to memory. Unless the * CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER flag is passed, the write is * preceded by a system-wide memory fence, equivalent to a * __threadfence_system() but scoped to the stream rather than a CUDA * thread.
*
* If the memory was registered via cuMemHostRegister(), the device pointer * should be obtained with cuMemHostGetDevicePointer(). This function cannot * be used with managed memory (cuMemAllocManaged).
*
* On Windows, the device must be using TCC, or the operation is not * supported. See cuDeviceGetAttribute(). * * @param stream The stream to do the write in. * @param addr The device address to write to. * @param value The value to write. * @param flags See {@link CUstreamWriteValue_flags} * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamWaitValue32 * @see JCudaDriver#cuStreamBatchMemOp * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuEventRecord */ public static int cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, int value, int flags) { return checkResult(cuStreamWriteValue32Native(stream, addr, value, flags)); } private static native int cuStreamWriteValue32Native(CUstream stream, CUdeviceptr addr, int value, int flags); /** * Wait on a memory location.
*
* Enqueues a synchronization of the stream on the given memory location. * Work ordered after the operation will block until the given condition on * the memory is satisfied. By default, the condition is to wait for * (int64_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other condition * types can be specified via flags.
*
* If the memory was registered via cuMemHostRegister(), the device pointer * should be obtained with cuMemHostGetDevicePointer(). This function cannot * be used with managed memory (cuMemAllocManaged).
*
* On Windows, the device must be using TCC, or the operation is not * supported. See cuDeviceGetAttributes(). * * @param stream The stream to synchronize on the memory location. * @param addr The memory location to wait on. * @param value The value to compare with the memory location. * @param flags See {@link CUstreamWaitValue_flags} * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamWriteValue64 * @see JCudaDriver#cuStreamBatchMemOp * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuStreamWaitEvent */ public static int cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, long value, int flags) { return checkResult(cuStreamWaitValue64Native(stream, addr, value, flags)); } private static native int cuStreamWaitValue64Native(CUstream stream, CUdeviceptr addr, long value, int flags); /** * Write a value to memory.
*
* Write a value to memory. Unless the * CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER flag is passed, the write is * preceded by a system-wide memory fence, equivalent to a * __threadfence_system() but scoped to the stream rather than a CUDA * thread.
*
* If the memory was registered via cuMemHostRegister(), the device pointer * should be obtained with cuMemHostGetDevicePointer(). This function cannot * be used with managed memory (cuMemAllocManaged).
*
* On Windows, the device must be using TCC, or the operation is not * supported. See cuDeviceGetAttribute(). * * @param stream The stream to do the write in. * @param addr The device address to write to. * @param value The value to write. * @param flags See {@link CUstreamWriteValue_flags} * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamWaitValue64 * @see JCudaDriver#cuStreamBatchMemOp * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuEventRecord */ public static int cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, long value, int flags) { return checkResult(cuStreamWriteValue64Native(stream, addr, value, flags)); } private static native int cuStreamWriteValue64Native(CUstream stream, CUdeviceptr addr, long value, int flags); /** *
* NOTE: This function is not yet supported in JCuda, and will throw an * UnsupportedOperationException!
*
* * Batch operations to synchronize the stream via memory operations.
*
* This is a batch version of cuStreamWaitValue32() and * cuStreamWriteValue32(). Batching operations may avoid some performance * overhead in both the API call and the device execution versus adding them * to the stream in separate API calls. The operations are enqueued in the * order they appear in the array.
*
* See CUstreamBatchMemOpType for the full set of supported operations, and * cuStreamWaitValue32() and cuStreamWriteValue32() for details of specific * operations.
*
* On Windows, the device must be using TCC, or this call is not supported. * See cuDeviceGetAttribute(). * * @param stream The stream to enqueue the operations in. * @param count The number of operations in the array. Must be less than * 256. * @param paramArray The types and parameters of the individual operations. * @param flags Reserved for future expansion; must be 0. * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamWaitValue32 * @see JCudaDriver#cuStreamWriteValue32 * @see JCudaDriver#cuMemHostRegister */ public static int cuStreamBatchMemOp(CUstream stream, int count, CUstreamBatchMemOpParams paramArray[], int flags) { // TODO Implement cuStreamBatchMemOp throw new UnsupportedOperationException("The cuStreamBatchMemOp function is not yet supported in JCuda"); } /** * Returns information about a pointer. * *
     * CUresult cuPointerGetAttribute (
     *      void* data,
     *      CUpointer_attribute attribute,
     *      CUdeviceptr ptr )
     * 
*
*

Returns information about a pointer. * The supported attributes are: *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_CONTEXT: *

    *
  • *
*

*

Returns in *data the CUcontext * in which ptr was allocated or registered. The type of data must be CUcontext *. *

*

If ptr was not allocated by, * mapped by, or registered with a CUcontext which uses unified virtual * addressing then CUDA_ERROR_INVALID_VALUE is returned. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_MEMORY_TYPE: *

    *
  • *
*

*

Returns in *data the physical * memory type of the memory that ptr addresses as a CUmemorytype * enumerated value. The type of data must be unsigned int. *

*

If ptr addresses device memory * then *data is set to CU_MEMORYTYPE_DEVICE. The particular * CUdevice on which the memory resides is the CUdevice of the CUcontext * returned by the CU_POINTER_ATTRIBUTE_CONTEXT attribute of ptr. *

*

If ptr addresses host memory * then *data is set to CU_MEMORYTYPE_HOST. *

*

If ptr was not allocated by, * mapped by, or registered with a CUcontext which uses unified virtual * addressing then CUDA_ERROR_INVALID_VALUE is returned. *

*

If the current CUcontext does not * support unified virtual addressing then CUDA_ERROR_INVALID_CONTEXT is * returned. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_DEVICE_POINTER: *

    *
  • *
*

*

Returns in *data the device * pointer value through which ptr may be accessed by kernels * running in the current CUcontext. The type of data must be * CUdeviceptr *. *

*

If there exists no device pointer value * through which kernels running in the current CUcontext may access ptr then CUDA_ERROR_INVALID_VALUE is returned. *

*

If there is no current CUcontext then * CUDA_ERROR_INVALID_CONTEXT is returned. *

*

Except in the exceptional disjoint * addressing cases discussed below, the value returned in *data * will equal the input value ptr. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_HOST_POINTER: *

    *
  • *
*

*

Returns in *data the host * pointer value through which ptr may be accessed by by the * host program. The type of data must be void **. If there * exists no host pointer value through which the host program may directly * access ptr then CUDA_ERROR_INVALID_VALUE is returned. *

*

Except in the exceptional disjoint * addressing cases discussed below, the value returned in *data * will equal the input value ptr. *

*
    *
  • *

    CU_POINTER_ATTRIBUTE_P2P_TOKENS: *

    *
  • *
*

*

Returns in *data two tokens * for use with the nv-p2p.h Linux kernel interface. data must * be a struct of type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS. *

*

ptr must be a pointer to * memory obtained from :cuMemAlloc(). Note that p2pToken and vaSpaceToken * are only valid for the lifetime of the source allocation. A subsequent * allocation at * the same address may return completely * different tokens. *

*

* Note that for most allocations in the * unified virtual address space the host and device pointer for accessing * the allocation * will be the same. The exceptions to this * are *

    *
  • *

    user memory registered using * cuMemHostRegister *

    *
  • *
  • *

    host memory allocated using * cuMemHostAlloc with the CU_MEMHOSTALLOC_WRITECOMBINED flag For these * types of allocation there will exist separate, disjoint host and device * addresses for accessing the allocation. * In particular *

    *
  • *
  • *

    The host address will correspond * to an invalid unmapped device address (which will result in an exception * if accessed from * the device) *

    *
  • *
  • *

    The device address will * correspond to an invalid unmapped host address (which will result in * an exception if accessed from * the host). For these types of * allocations, querying CU_POINTER_ATTRIBUTE_HOST_POINTER and * CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host * and device addresses from either address. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param data Returned pointer attribute value * @param attribute Pointer attribute to query * @param ptr Pointer * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuMemAlloc * @see JCudaDriver#cuMemFree * @see JCudaDriver#cuMemAllocHost * @see JCudaDriver#cuMemFreeHost * @see JCudaDriver#cuMemHostAlloc * @see JCudaDriver#cuMemHostRegister * @see JCudaDriver#cuMemHostUnregister */ public static int cuPointerGetAttribute(Pointer data, int attribute, CUdeviceptr ptr) { return checkResult(cuPointerGetAttributeNative(data, attribute, ptr)); } private static native int cuPointerGetAttributeNative(Pointer data, int attribute, CUdeviceptr ptr); /** * Prefetches memory to the specified destination device
*
* Prefetches memory to the specified destination device. devPtr is the * base device pointer of the memory to be prefetched and dstDevice is the * destination device. count specifies the number of bytes to copy. * hStream is the stream in which the operation is enqueued.
*
* Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory.
*
* If no physical memory has been allocated for this region, then this memory region * will be populated and mapped on the destination device. If there's insufficient * memory to prefetch the desired region, the Unified Memory driver may evict pages * belonging to other memory regions to make room. If there's no memory that can be * evicted, then the Unified Memory driver will prefetch less than what was requested. *
*
* In the normal case, any mappings to the previous location of the migrated pages are * removed and mappings for the new location are only setup on the dstDevice. * The application can exercise finer control on these mappings using ::cudaMemAdvise.
*
* Note that this function is asynchronous with respect to the host and all work * on other devices.
* * @param devPtr Pointer to be prefetched * @param count Size in bytes * @param dstDevice Destination device to prefetch to * @param hStream Stream to enqueue prefetch operation * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuMemcpy * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyAsync * @see JCudaDriver#cuMemcpy3DPeerAsync * @see JCudaDriver#cuMemAdvise */ public static int cuMemPrefetchAsync(CUdeviceptr devPtr, long count, CUdevice dstDevice, CUstream hStream) { return checkResult(cuMemPrefetchAsyncNative(devPtr, count, dstDevice, hStream)); } private static native int cuMemPrefetchAsyncNative(CUdeviceptr devPtr, long count, CUdevice dstDevice, CUstream hStream); /** * Advise about the usage of a given memory range
*
* Advise the Unified Memory subsystem about the usage pattern for the memory range * starting at devPtr with a size of count bytes.
*
* The advice parameter can take the following values: *
    *
  • CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read * from and only occasionally written to. This allows the driver to create read-only * copies of the data in a processor's memory when that processor accesses it. Similarly, * if cuMemPrefetchAsync is called on this region, it will create a read-only copy of * the data on the destination processor. When a processor writes to this data, all copies * of the corresponding page are invalidated except for the one where the write occurred. * The device argument is ignored for this advice. *
  • *
  • CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read * duplicated copies of the data will be freed no later than the next write access to that data. *
  • *
  • CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the * data to be the memory belonging to device. Passing in CU_DEVICE_CPU for device sets the * preferred location as CPU memory. Setting the preferred location does not cause data to * migrate to that location immediately. Instead, it guides the migration policy when a fault * occurs on that memory region. If the data is already in its preferred location and the * faulting processor can establish a mapping without requiring the data to be migrated, then * the migration will be avoided. On the other hand, if the data is not in its preferred location * or if a direct mapping cannot be established, then it will be migrated to the processor accessing * it. It is important to note that setting the preferred location does not prevent data prefetching * done using ::cuMemPrefetchAsync. * Having a preferred location can override the thrash detection and resolution logic in the Unified * Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU * memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But * if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. * When the Unified Memory driver has to evict pages from a certain location on account of that * memory being oversubscribed, the preferred location will be used to decide the destination to which * a page should be evicted to. * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred * location will be ignored for that subset. *
  • *
  • CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION * and changes the preferred location to none. *
  • *
  • CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by device. * This does not cause data migration and has no impact on the location of the data per se. Instead, * it causes the data to always be mapped in the specified processor's page tables, as long as the * location of the data permits a mapping to be established. If the data gets migrated for any reason, * the mappings are updated accordingly. * This advice is useful in scenarios where data locality is not important, but avoiding faults is. * Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the * data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data * over to the other GPUs is not as important because the accesses are infrequent and the overhead of * migration may be too high. But preventing faults can still help improve performance, and so having * a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated * to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the * ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the * page in CPU memory. *
  • *
  • CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of * mappings may be removed at any time causing accesses to result in page faults. *
  • *
* Passing in ::CU_DEVICE_CPU for device will set the advice for the CPU.
*
* Note that this function is asynchronous with respect to the host and all work * on other devices. * * @param devPtr Pointer to memory to set the advice for * @param count Size in bytes of the memory range * @param advice Advice to be applied for the specified memory range * @param device Device to apply the advice for * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuMemcpy * @see JCudaDriver#cuMemcpyPeer * @see JCudaDriver#cuMemcpyAsync * @see JCudaDriver#cuMemcpy3DPeerAsync * @see JCudaDriver#cuMemPrefetchAsync */ public static int cuMemAdvise(CUdeviceptr devPtr, long count, int advice, CUdevice device) { return checkResult(cuMemAdviseNative(devPtr, count, advice, device)); } private static native int cuMemAdviseNative(CUdeviceptr devPtr, long count, int advice, CUdevice device); /** * Query an attribute of a given memory range.
* * Query an attribute about the memory range starting at devPtr with a size * of count bytes. The memory range must refer to managed memory allocated * via cuMemAllocManaged or declared via __managed__ variables.
*
* The attribute parameter can take the following values:
*
    *
  • CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: If this attribute is specified, * data will be interpreted as a 32-bit integer, and dataSize must be 4. The * result returned will be 1 if all pages in the given memory range have * read-duplication enabled, or 0 otherwise.
  • *
  • CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: If this attribute is * specified, data will be interpreted as a 32-bit integer, and dataSize * must be 4. The result returned will be a GPU device id if all pages in * the memory range have that GPU as their preferred location, or it will be * CU_DEVICE_CPU if all pages in the memory range have the CPU as their * preferred location, or it will be CU_DEVICE_INVALID if either all the * pages don't have the same preferred location or some of the pages don't * have a preferred location at all. Note that the actual location of the * pages in the memory range at the time of the query may be different from * the preferred location.
  • *
  • CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: If this attribute is specified, * data will be interpreted as an array of 32-bit integers, and dataSize * must be a non-zero multiple of 4. The result returned will be a list of * device ids that had CU_MEM_ADVISE_SET_ACCESSED_BY set for that entire * memory range. If any device does not have that advice set for the entire * memory range, that device will not be included. If data is larger than * the number of devices that have that advice set for that memory range, * CU_DEVICE_INVALID will be returned in all the extra space provided. For * ex., if dataSize is 12 (i.e. data has 3 elements) and only device 0 has * the advice set, then the result returned will be { 0, CU_DEVICE_INVALID, * CU_DEVICE_INVALID }. If data is smaller than the number of devices that * have that advice set, then only as many devices will be returned as can * fit in the array. There is no guarantee on which specific devices will be * returned, however.
  • *
  • CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: If this attribute is * specified, data will be interpreted as a 32-bit integer, and dataSize * must be 4. The result returned will be the last location to which all * pages in the memory range were prefetched explicitly via * cuMemPrefetchAsync. This will either be a GPU id or CU_DEVICE_CPU * depending on whether the last location for prefetch was a GPU or the CPU * respectively. If any page in the memory range was never explicitly * prefetched or if all pages were not prefetched to the same location, * CU_DEVICE_INVALID will be returned. Note that this simply returns the * last location that the applicaton requested to prefetch the memory range * to. It gives no indication as to whether the prefetch operation to that * location has completed or even begun.
  • *
* * @param data A pointers to a memory location where the result of each * attribute query will be written to. * @param dataSize Array containing the size of data * @param attribute The attribute to query * @param devPtr Start of the range to query * @param count Size of the range to query * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuMemRangeGetAttributes * @see JCudaDriver#cuMemPrefetchAsync * @see JCudaDriver#cuMemAdvise */ public static int cuMemRangeGetAttribute(Pointer data, long dataSize, int attribute, CUdeviceptr devPtr, long count) { return checkResult(cuMemRangeGetAttributeNative(data, dataSize, attribute, devPtr, count)); } private static native int cuMemRangeGetAttributeNative(Pointer data, long dataSize, int attribute, CUdeviceptr devPtr, long count); /** * Query attributes of a given memory range.
*
* Query attributes of the memory range starting at devPtr with a size of * count bytes. The memory range must refer to managed memory allocated via * cuMemAllocManaged or declared via __managed__ variables. The attributes * array will be interpreted to have numAttributes entries. The dataSizes * array will also be interpreted to have numAttributes entries. The results * of the query will be stored in data.
*
*
* The list of supported attributes are given below. Please refer to * {@link JCudaDriver#cuMemRangeGetAttribute} for attribute descriptions and * restrictions. *
    *
  • CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY
  • *
  • CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION
  • *
  • CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY
  • *
  • CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
  • *
* * @param data A two-dimensional array containing pointers to memory * locations where the result of each attribute query will be written * to. * @param dataSizes Array containing the sizes of each result * @param attributes An array of attributes to query (numAttributes and the * number of attributes in this array should match) * @param numAttributes Number of attributes to query * @param devPtr Start of the range to query * @param count Size of the range to query * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_DEVICE */ public static int cuMemRangeGetAttributes(Pointer data[], long dataSizes[], int attributes[], long numAttributes, CUdeviceptr devPtr, long count) { return checkResult(cuMemRangeGetAttributesNative(data, dataSizes, attributes, numAttributes, devPtr, count)); } private static native int cuMemRangeGetAttributesNative(Pointer data[], long dataSizes[], int attributes[], long numAttributes, CUdeviceptr devPtr, long count); /** * Set attributes on a previously allocated memory region
*
* The supported attributes are: *
*
    *
  • CU_POINTER_ATTRIBUTE_SYNC_MEMOPS: * * A boolean attribute that can either be set (1) or unset (0). When set, * the region of memory that ptr points to is guaranteed to always synchronize * memory operations that are synchronous. If there are some previously initiated * synchronous memory operations that are pending when this attribute is set, the * function does not return until those memory operations are complete. * See further documentation in the section titled "API synchronization behavior" * to learn more about cases when synchronous memory operations can * exhibit asynchronous behavior. * value will be considered as a pointer to an unsigned integer to which this attribute is to be set. *
  • *
* @param value Pointer to memory containing the value to be set * @param attribute Pointer attribute to set * @param ptr Pointer to a memory region allocated using CUDA memory allocation APIs * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuPointerGetAttribute, * @see JCudaDriver#cuPointerGetAttributes, * @see JCudaDriver#cuMemAlloc, * @see JCudaDriver#cuMemFree, * @see JCudaDriver#cuMemAllocHost, * @see JCudaDriver#cuMemFreeHost, * @see JCudaDriver#cuMemHostAlloc, * @see JCudaDriver#cuMemHostRegister, * @see JCudaDriver#cuMemHostUnregister */ public static int cuPointerSetAttribute(Pointer value, int attribute, CUdeviceptr ptr) { return checkResult(cuPointerSetAttributeNative(value, attribute, ptr)); } private static native int cuPointerSetAttributeNative(Pointer value, int attribute, CUdeviceptr ptr); /** * Returns information about a pointer.
*
* The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions): *
    *
  • CU_POINTER_ATTRIBUTE_CONTEXT
  • *
  • CU_POINTER_ATTRIBUTE_MEMORY_TYPE
  • *
  • CU_POINTER_ATTRIBUTE_DEVICE_POINTER
  • *
  • CU_POINTER_ATTRIBUTE_HOST_POINTER
  • *
  • CU_POINTER_ATTRIBUTE_SYNC_MEMOPS
  • *
  • CU_POINTER_ATTRIBUTE_BUFFER_ID
  • *
  • CU_POINTER_ATTRIBUTE_IS_MANAGED
  • *
* Unlike ::cuPointerGetAttribute, this function will not return an error when the ptr * encountered is not a valid CUDA pointer. Instead, the attributes are assigned default NULL values * and CUDA_SUCCESS is returned.
*
* If ptr was not allocated by, mapped by, or registered with a ::CUcontext which uses UVA * (Unified Virtual Addressing), ::CUDA_ERROR_INVALID_CONTEXT is returned. *
* * @param numAttributes Number of attributes to query * @param attributes An array of attributes to query * (numAttributes and the number of attributes in this array should match) * @param data A two-dimensional array containing pointers to memory * locations where the result of each attribute query will be written to. * @param ptr Pointer to query * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE * * @see JCudaDriver#cuPointerGetAttribute, * @see JCudaDriver#cuPointerSetAttribute */ public static int cuPointerGetAttributes(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr) { return checkResult(cuPointerGetAttributesNative(numAttributes, attributes, data, ptr)); } private static native int cuPointerGetAttributesNative(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr); /** * Create a stream. * *
     * CUresult cuStreamCreate (
     *      CUstream* phStream,
     *      unsigned int  Flags )
     * 
*
*

Create a stream. Creates a stream and * returns a handle in phStream. The Flags argument * determines behaviors of the stream. Valid values for Flags * are: *

    *
  • *

    CU_STREAM_DEFAULT: Default * stream creation flag. *

    *
  • *
  • *

    CU_STREAM_NON_BLOCKING: * Specifies that work running in the created stream may run concurrently * with work in stream 0 (the NULL stream), and that * the created stream should * perform no implicit synchronization with stream 0. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param phStream Returned newly created stream * @param Flags Parameters for stream creation * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamCreate(CUstream phStream, int Flags) { return checkResult(cuStreamCreateNative(phStream, Flags)); } private static native int cuStreamCreateNative(CUstream phStream, int Flags); /** * Create a stream with the given priority * * Creates a stream with the specified priority and returns a handle in phStream. * This API alters the scheduler priority of work in the stream. Work in a higher * priority stream may preempt work already executing in a low priority stream. * * priority follows a convention where lower numbers represent higher priorities. * '0' represents default priority. The range of meaningful numerical priorities can * be queried using ::cuCtxGetStreamPriorityRange. If the specified priority is * outside the numerical range returned by ::cuCtxGetStreamPriorityRange, * it will automatically be clamped to the lowest or the highest number in the range. * * @param phStream Returned newly created stream * @param flags Flags for stream creation. See ::cuStreamCreate for a list of valid flags * @param priority Stream priority. Lower numbers represent higher priorities. * See ::cuCtxGetStreamPriorityRange for more information about * meaningful stream priorities that can be passed. * * Note: Stream priorities are supported only on GPUs * with compute capability 3.5 or higher. * * Note: In the current implementation, only compute kernels launched in * priority streams are affected by the stream's priority. Stream priorities have * no effect on host-to-device and device-to-host memory operations. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamGetPriority * @see JCudaDriver#cuCtxGetStreamPriorityRange * @see JCudaDriver#cuStreamGetFlags * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback * @see JCudaDriver#cudaStreamCreateWithPriority */ public static int cuStreamCreateWithPriority(CUstream phStream, int flags, int priority) { return checkResult(cuStreamCreateWithPriorityNative(phStream, flags, priority)); } private static native int cuStreamCreateWithPriorityNative(CUstream phStream, int flags, int priority); /** * Query the priority of a given stream. * * Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority * and return the priority in priority. Note that if the stream was created with a * priority outside the numerical range returned by ::cuCtxGetStreamPriorityRange, * this function returns the clamped priority. * See ::cuStreamCreateWithPriority for details about priority clamping. * * @param hStream Handle to the stream to be queried * @param priority Pointer to a signed integer in which the stream's priority is returned * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamCreateWithPriority * @see JCudaDriver#cuCtxGetStreamPriorityRange * @see JCudaDriver#cuStreamGetFlags * @see JCudaDriver#cudaStreamGetPriority */ public static int cuStreamGetPriority(CUstream hStream, int priority[]) { return checkResult(cuStreamGetPriorityNative(hStream, priority)); } private static native int cuStreamGetPriorityNative(CUstream hStream, int priority[]); /** * Query the flags of a given stream. * * Query the flags of a stream created using ::cuStreamCreate or * ::cuStreamCreateWithPriority and return the flags in flags. * * @param hStream Handle to the stream to be queried * @param flags Pointer to an unsigned integer in which the stream's flags * are returned The value returned in flags is a logical 'OR' of * all flags that were used while creating this stream. See * ::cuStreamCreate for the list of valid flags * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamGetPriority * @see JCudaDriver#cudaStreamGetFlags */ public static int cuStreamGetFlags(CUstream hStream, int flags[]) { return checkResult(cuStreamGetFlagsNative(hStream, flags)); } private static native int cuStreamGetFlagsNative(CUstream hStream, int flags[]); /** * Query the context associated with a stream. * * Returns the CUDA context that the stream is associated with. * * The stream handle hStream can refer to any of the following: *
    *
  • * a stream created via any of the CUDA driver APIs such as ::cuStreamCreate * and ::cuStreamCreateWithPriority, or their runtime API equivalents such as * ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority. * The returned context is the context that was active in the calling thread when the * stream was created. Passing an invalid handle will result in undefined behavior. *
  • *
  • * any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY * and ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are * also accepted, which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread * respectively. Specifying any of the special handles will return the context * current to the calling thread. If no context is current to the calling thread, * ::CUDA_ERROR_INVALID_CONTEXT is returned. *
  • *
* * @param hStream Handle to the stream to be queried * @param pctx Returned context associated with the stream * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_HANDLE, * * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamCreateWithPriority * @see JCudaDriver#cuStreamGetPriority * @see JCudaDriver#cuStreamGetFlags * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback * @see JCudaDriver#cudaStreamCreate * @see JCudaDriver#cudaStreamCreateWithFlags */ public static int cuStreamGetCtx(CUstream hStream, CUcontext pctx) { return checkResult(cuStreamGetCtxNative(hStream, pctx)); } private static native int cuStreamGetCtxNative(CUstream hStream, CUcontext pctx); /** * Make a compute stream wait on an event. * *
     * CUresult cuStreamWaitEvent (
     *      CUstream hStream,
     *      CUevent hEvent,
     *      unsigned int  Flags )
     * 
*
*

Make a compute stream wait on an event. * Makes all future work submitted to hStream wait until hEvent reports completion before beginning execution. This * synchronization will be performed efficiently on the device. The event * hEvent may be from a different * context than hStream, in which case this function will * perform cross-device synchronization. *

*

The stream hStream will wait * only for the completion of the most recent host call to cuEventRecord() * on hEvent. Once this call has returned, any functions * (including cuEventRecord() and cuEventDestroy()) may be called on hEvent again, and subsequent calls will not have any effect on * hStream. *

*

If hStream is 0 (the NULL * stream) any future work submitted in any stream will wait for hEvent to complete before beginning execution. This effectively * creates a barrier for all future work submitted to the context. *

*

If cuEventRecord() has not been called * on hEvent, this call acts as if the record has already * completed, and so is a functional no-op. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to wait * @param hEvent Event to wait on (may not be NULL) * @param Flags Parameters for the operation (must be 0) * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuEventRecord * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback * @see JCudaDriver#cuStreamDestroy */ public static int cuStreamWaitEvent(CUstream hStream, CUevent hEvent, int Flags) { return checkResult(cuStreamWaitEventNative(hStream, hEvent, Flags)); } private static native int cuStreamWaitEventNative(CUstream hStream, CUevent hEvent, int Flags); /** * Add a callback to a compute stream. * * This function is slated for eventual deprecation and removal. If * you do not require the callback to execute in case of a device error, * consider using ::cuLaunchHostFunc. Additionally, this function is not * supported with ::cuStreamBeginCapture and ::cuStreamEndCapture, unlike * ::cuLaunchHostFunc. * *
     * CUresult cuStreamAddCallback (
     *      CUstream hStream,
     *      CUstreamCallback callback,
     *      void* userData,
     *      unsigned int  flags )
     * 
*
*

Add a callback to a compute stream. Adds * a callback to be called on the host after all currently enqueued items * in the stream * have completed. For each cuStreamAddCallback * call, the callback will be executed exactly once. The callback will * block later * work in the stream until it is finished. *

*

The callback may be passed CUDA_SUCCESS * or an error code. In the event of a device error, all subsequently * executed callbacks will receive an appropriate CUresult. *

*

Callbacks must not make any CUDA API * calls. Attempting to use a CUDA API will result in CUDA_ERROR_NOT_PERMITTED. * Callbacks must not perform any synchronization that may depend on * outstanding device work or other callbacks that are not * mandated to run earlier. Callbacks * without a mandated order (in independent streams) execute in undefined * order and may be * serialized. *

*

This API requires compute capability * 1.1 or greater. See cuDeviceGetAttribute or cuDeviceGetProperties to * query compute capability. Attempting to use this API with earlier * compute versions will return CUDA_ERROR_NOT_SUPPORTED. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to add callback to * @param callback The function to call once preceding stream operations are complete * @param userData User specified data to be passed to the callback function * @param flags Reserved for future use, must be 0 * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamDestroy */ public static int cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, Object userData, int flags) { return checkResult(cuStreamAddCallbackNative(hStream, callback, userData, flags)); } private static native int cuStreamAddCallbackNative(CUstream hStream, CUstreamCallback callback, Object userData, int flags); /** * Begins graph capture on a stream. * * Begin graph capture on \p hStream. When a stream is in capture mode, all operations * pushed into the stream will not be executed, but will instead be captured into * a graph, which will be returned via ::cuStreamEndCapture. Capture may not be initiated * if \p stream is CU_STREAM_LEGACY. Capture must be ended on the same stream in which * it was initiated, and it may only be initiated if the stream is not already in capture * mode. The capture mode may be queried via ::cuStreamIsCapturing. * * @param hStream - Stream in which to initiate capture * * Kernels captured using this API must not use texture and surface references. * Reading or writing through any texture or surface reference is undefined * behavior. This restriction does not apply to texture and surface objects. * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see * JCudaDriver#cuStreamCreate * JCudaDriver#cuStreamIsCapturing * JCudaDriver#cuStreamEndCapture */ public static int cuStreamBeginCapture(CUstream hStream, int mode) { return checkResult(cuStreamBeginCaptureNative(hStream, mode)); } private static native int cuStreamBeginCaptureNative(CUstream hStream, int mode); /** * Swaps the stream capture interaction mode for a thread. * * Sets the calling thread's stream capture interaction mode to the value contained * in \p *mode, and overwrites \p *mode with the previous mode for the thread. To * facilitate deterministic behavior across function or module boundaries, callers * are encouraged to use this API in a push-pop fashion:
*
 
     * CUstreamCaptureMode mode = desiredMode;
     * cuThreadExchangeStreamCaptureMode(&mode);
     * ...
     * cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode
     * 

*
* * During stream capture (see ::cuStreamBeginCapture), some actions, such as a call * to ::cudaMalloc, may be unsafe. In the case of ::cudaMalloc, the operation is * not enqueued asynchronously to a stream, and is not observed by stream capture. * Therefore, if the sequence of operations captured via ::cuStreamBeginCapture * depended on the allocation being replayed whenever the graph is launched, the * captured graph would be invalid.
*
* Therefore, stream capture places restrictions on API calls that can be made within * or concurrently to a ::cuStreamBeginCapture-::cuStreamEndCapture sequence. This * behavior can be controlled via this API and flags to ::cuStreamBeginCapture. *
* A thread's mode is one of the following: *
    *
  • CU_STREAM_CAPTURE_MODE_GLOBAL: This is the default mode. If the local thread has * an ongoing capture sequence that was not initiated with * \p CU_STREAM_CAPTURE_MODE_RELAXED at \p cuStreamBeginCapture, or if any other thread * has a concurrent capture sequence initiated with \p CU_STREAM_CAPTURE_MODE_GLOBAL, * this thread is prohibited from potentially unsafe API calls. *
  • *
  • CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: If the local thread has an ongoing capture * sequence not initiated with \p CU_STREAM_CAPTURE_MODE_RELAXED, it is prohibited * from potentially unsafe API calls. Concurrent capture sequences in other threads * are ignored. *
  • *
  • CU_STREAM_CAPTURE_MODE_RELAXED: The local thread is not prohibited from potentially * unsafe API calls. Note that the thread is still prohibited from API calls which * necessarily conflict with stream capture, for example, attempting ::cuEventQuery * on an event that was last recorded inside a capture sequence. *
  • *
* * @param mode - Pointer to mode value to swap with the current mode * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuStreamBeginCapture */ public static int cuThreadExchangeStreamCaptureMode(int mode[]) { return checkResult(cuThreadExchangeStreamCaptureModeNative(mode)); } private static native int cuThreadExchangeStreamCaptureModeNative(int mode[]); /** * Ends capture on a stream, returning the captured graph. * * End capture on \p hStream, returning the captured graph via \p phGraph. * Capture must have been initiated on \p hStream via a call to ::cuStreamBeginCapture. * If capture was invalidated, due to a violation of the rules of stream capture, then * a NULL graph will be returned. * * If the \p mode argument to ::cuStreamBeginCapture was not * ::CU_STREAM_CAPTURE_MODE_RELAXED, this call must be from the same thread as * ::cuStreamBeginCapture. * * @param hStream - Stream to query * @param phGraph - The captured graph * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE * CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD * * @see * JCudaDriver#cuStreamCreate * JCudaDriver#cuStreamBeginCapture * JCudaDriver#cuStreamIsCapturing */ public static int cuStreamEndCapture(CUstream hStream, CUgraph phGraph) { return checkResult(cuStreamEndCaptureNative(hStream, phGraph)); } private static native int cuStreamEndCaptureNative(CUstream hStream, CUgraph phGraph); /** * Returns a stream's capture status * * Return the capture status of \p hStream via \p captureStatus. After a successful * call, \p *captureStatus will contain one of the following: *
    *
  • ::CU_STREAM_CAPTURE_STATUS_NONE: The stream is not capturing.
  • *
  • ::CU_STREAM_CAPTURE_STATUS_ACTIVE: The stream is capturing.
  • *
  • ::CU_STREAM_CAPTURE_STATUS_INVALIDATED: The stream was capturing but an error * has invalidated the capture sequence. The capture sequence must be terminated * with ::cuStreamEndCapture on the stream where it was initiated in order to * continue using \p hStream.
  • *
* Note that, if this is called on ::CU_STREAM_LEGACY (the "null stream") while * a blocking stream in the same context is capturing, it will return * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT and \p *captureStatus is unspecified * after the call. The blocking stream capture is not invalidated.
*
* When a blocking stream is capturing, the legacy stream is in an * unusable state until the blocking stream capture is terminated. The legacy * stream is not supported for stream capture, but attempted use would have an * implicit dependency on the capturing stream(s). * * @param hStream - Stream to query * @param captureStatus - Returns the stream's capture status * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_STREAM_CAPTURE_IMPLICIT * * @see * JCudaDriver#cuStreamCreate * JCudaDriver#cuStreamBeginCapture * JCudaDriver#cuStreamEndCapture */ public static int cuStreamIsCapturing(CUstream hStream, int captureStatus[]) { return checkResult(cuStreamIsCapturingNative(hStream, captureStatus)); } private static native int cuStreamIsCapturingNative(CUstream hStream, int captureStatus[]); /** * Query capture status of a stream * * Query the capture status of a stream and and get an id for * the capture sequence, which is unique over the lifetime of the process. * * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created * with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT. * * A valid id is returned only if both of the following are true: * - the call returns CUDA_SUCCESS * - captureStatus is set to ::CU_STREAM_CAPTURE_STATUS_ACTIVE * * @return * CUDA_SUCCESS, * CUDA_ERROR_STREAM_CAPTURE_IMPLICIT * * @see JCudaDriver#cuStreamBeginCapture, * @see JCudaDriver#cuStreamIsCapturing */ public static int cuStreamGetCaptureInfo(CUstream hStream, int captureStatus[], long id[]) { return checkResult(cuStreamGetCaptureInfoNative(hStream, captureStatus, id)); } private static native int cuStreamGetCaptureInfoNative(CUstream hStream, int captureStatus[], long id[]); /** * Attach memory to a stream asynchronously. * * Enqueues an operation in hStream to specify stream association of * length bytes of memory starting from dptr. This function is a * stream-ordered operation, meaning that it is dependent on, and will * only take effect when, previous work in stream has completed. Any * previous association is automatically replaced. * * dptr must point to one of the following types of memories: *
    *
  • managed memory declared using the __managed__ keyword or allocated with * ::cuMemAllocManaged.
  • *
  • a valid host-accessible region of system-allocated pageable memory. This * type of memory may only be specified if the device associated with the * stream reports a non-zero value for the device attribute * ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
  • *
* * For managed allocations, length must be either zero or the entire * allocation's size. Both indicate that the entire allocation's stream * association is being changed. Currently, it is not possible to change stream * association for a portion of a managed allocation.
*
* For pageable host allocations, length must be non-zero.
*
* The stream association is specified using flags which must be * one of ::CUmemAttach_flags. * If the ::CU_MEM_ATTACH_GLOBAL flag is specified, the memory can be accessed * by any stream on any device. * If the ::CU_MEM_ATTACH_HOST flag is specified, the program makes a guarantee * that it won't access the memory on the device from any stream on a device that * has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. * If the ::CU_MEM_ATTACH_SINGLE flag is specified and hStream is associated with * a device that has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, * the program makes a guarantee that it will only access the memory on the device * from hStream. It is illegal to attach singly to the NULL stream, because the * NULL stream is a virtual global stream and not a specific stream. An error will * be returned in this case.
*
* When memory is associated with a single stream, the Unified Memory system will * allow CPU access to this memory region so long as all operations in hStream * have completed, regardless of whether other streams are active. In effect, * this constrains exclusive ownership of the managed memory region by * an active GPU to per-stream activity instead of whole-GPU activity.
*
* Accessing memory on the device from streams that are not associated with * it will produce undefined results. No error checking is performed by the * Unified Memory system to ensure that kernels launched into other streams * do not access this region.
*
* It is a program's responsibility to order calls to ::cuStreamAttachMemAsync * via events, synchronization or other means to ensure legal access to memory * at all times. Data visibility and coherency will be changed appropriately * for all kernels which follow a stream-association change.
*
* If hStream is destroyed while data is associated with it, the association is * removed and the association reverts to the default visibility of the allocation * as specified at ::cuMemAllocManaged. For __managed__ variables, the default * association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an * asynchronous operation, and as a result, the change to default association won't * happen until all work in the stream has completed. * * @param hStream - Stream in which to enqueue the attach operation * @param dptr - Pointer to memory (must be a pointer to managed memory or * to a valid host-accessible region of system-allocated * pageable memory) * @param length - Length of memory * @param flags - Must be one of ::CUmemAttach_flags * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_SUPPORTED * * @see JCudaDriver#cuStreamCreate * JCudaDriver#cuStreamQuery * JCudaDriver#cuStreamSynchronize * JCudaDriver#cuStreamWaitEvent * JCudaDriver#cuStreamDestroy * JCudaDriver#cuMemAllocManaged * JCudaDriver#cudaStreamAttachMemAsync */ public static int cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, long length, int flags) { return checkResult(cuStreamAttachMemAsyncNative(hStream, dptr, length, flags)); } private static native int cuStreamAttachMemAsyncNative(CUstream hStream, CUdeviceptr dptr, long length, int flags); /** * Determine status of a compute stream. * *
     * CUresult cuStreamQuery (
     *      CUstream hStream )
     * 
*
*

Determine status of a compute stream. * Returns CUDA_SUCCESS if all operations in the stream specified by hStream have completed, or CUDA_ERROR_NOT_READY if not. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to query status of * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_READY * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamQuery(CUstream hStream) { return checkResult(cuStreamQueryNative(hStream)); } private static native int cuStreamQueryNative(CUstream hStream); /** * Wait until a stream's tasks are completed. * *
     * CUresult cuStreamSynchronize (
     *      CUstream hStream )
     * 
*
*

Wait until a stream's tasks are completed. * Waits until the device has completed all operations in the stream * specified by * hStream. If the context was * created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will * block until the stream is finished with all of its tasks. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to wait for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamDestroy * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamSynchronize(CUstream hStream) { return checkResult(cuStreamSynchronizeNative(hStream)); } private static native int cuStreamSynchronizeNative(CUstream hStream); /** * Destroys a stream. * *
     * CUresult cuStreamDestroy (
     *      CUstream hStream )
     * 
*
*

Destroys a stream. Destroys the stream * specified by hStream. *

*

In case the device is still doing work * in the stream hStream when cuStreamDestroy() is called, the * function will return immediately and the resources associated with hStream will be released automatically once the device has * completed all work in hStream. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param hStream Stream to destroy * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuStreamCreate * @see JCudaDriver#cuStreamWaitEvent * @see JCudaDriver#cuStreamQuery * @see JCudaDriver#cuStreamSynchronize * @see JCudaDriver#cuStreamAddCallback */ public static int cuStreamDestroy(CUstream hStream) { return checkResult(cuStreamDestroyNative(hStream)); } private static native int cuStreamDestroyNative(CUstream hStream); /** * Copies attributes from source stream to destination stream * * Copies attributes from source stream \p src to destination stream \p dst. * Both streams must have the same context. * * @param dst Destination stream * @param src Source stream * * For list of attributes see ::CUstreamAttrID * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see CUaccessPolicyWindow */ public static int cuStreamCopyAttributes(CUstream dst, CUstream src) { return checkResult(cuStreamCopyAttributesNative(dst, src)); } private static native int cuStreamCopyAttributesNative(CUstream dst, CUstream src); /** * Queries stream attribute. * * Queries attribute attr from hStream and stores it in corresponding * member of value_out. * * @param hStream * @param attr * @param value_out * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE * * @see CUaccessPolicyWindow */ public static int cuStreamGetAttribute(CUstream hStream, int attr, CUstreamAttrValue value_out) { return checkResult(cuStreamGetAttributeNative(hStream, attr, value_out)); } private static native int cuStreamGetAttributeNative(CUstream hStream, int attr, CUstreamAttrValue value_out); /** * Sets stream attribute. * * Sets attribute attr on hStream from corresponding attribute of * value. The updated attribute will be applied to subsequent work * submitted to the stream. It will not affect previously submitted work. * * @param hStream * @param attr * @param value * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_HANDLE * * @see CUaccessPolicyWindow */ public static int cuStreamSetAttribute(CUstream hStream, int attr, CUstreamAttrValue value) { return checkResult(cuStreamSetAttributeNative(hStream, attr, value)); } private static native int cuStreamSetAttributeNative(CUstream hStream, int attr, CUstreamAttrValue value); /** * Initializes OpenGL interoperability. * *
     * CUresult cuGLInit (
     *      void )
     * 
*
*

Initializes OpenGL interoperability. * DeprecatedThis function is * deprecated as of Cuda 3.0.Initializes OpenGL interoperability. * This function is deprecated and calling it is no longer required. It * may fail if the * needed OpenGL driver facilities are * not available. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGLMapBufferObject * @see JCudaDriver#cuGLRegisterBufferObject * @see JCudaDriver#cuGLUnmapBufferObject * @see JCudaDriver#cuGLUnregisterBufferObject * @see JCudaDriver#cuGLMapBufferObjectAsync * @see JCudaDriver#cuGLUnmapBufferObjectAsync * @see JCudaDriver#cuGLSetBufferObjectMapFlags * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLInit() { return checkResult(cuGLInitNative()); } private static native int cuGLInitNative(); /** * Create a CUDA context for interoperability with OpenGL. * *
     * CUresult cuGLCtxCreate (
     *      CUcontext* pCtx,
     *      unsigned int  Flags,
     *      CUdevice device )
     * 
*
*

Create a CUDA context for * interoperability with OpenGL. * DeprecatedThis function is * deprecated as of Cuda 5.0.This function is deprecated and should * no longer be used. It is no longer necessary to associate a CUDA * context with an OpenGL * context in order to achieve maximum * interoperability performance. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCtx Returned CUDA context * @param Flags Options for CUDA context creation * @param device Device on which to create the context * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuGLInit * @see JCudaDriver#cuGLMapBufferObject * @see JCudaDriver#cuGLRegisterBufferObject * @see JCudaDriver#cuGLUnmapBufferObject * @see JCudaDriver#cuGLUnregisterBufferObject * @see JCudaDriver#cuGLMapBufferObjectAsync * @see JCudaDriver#cuGLUnmapBufferObjectAsync * @see JCudaDriver#cuGLSetBufferObjectMapFlags * * @deprecated Deprecated as of CUDA 5.0 */ @Deprecated public static int cuGLCtxCreate( CUcontext pCtx, int Flags, CUdevice device ) { return checkResult(cuGLCtxCreateNative(pCtx, Flags, device)); } private static native int cuGLCtxCreateNative(CUcontext pCtx, int Flags, CUdevice device); /** * Gets the CUDA devices associated with the current OpenGL context. * *
     * CUresult cuGLGetDevices (
     *      unsigned int* pCudaDeviceCount,
     *      CUdevice* pCudaDevices,
     *      unsigned int  cudaDeviceCount,
     *      CUGLDeviceList deviceList )
     * 
*
*

Gets the CUDA devices associated with * the current OpenGL context. Returns in *pCudaDeviceCount * the number of CUDA-compatible devices corresponding to the current * OpenGL context. Also returns in *pCudaDevices at most * cudaDeviceCount of the CUDA-compatible devices corresponding to the * current OpenGL context. If any of the GPUs being * used by the current OpenGL context are * not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE. *

*

The deviceList argument may * be any of the following: *

    *
  • *

    CU_GL_DEVICE_LIST_ALL: Query * all devices used by the current OpenGL context. *

    *
  • *
  • *

    CU_GL_DEVICE_LIST_CURRENT_FRAME: * Query the devices used by the current OpenGL context to render the * current frame (in SLI). *

    *
  • *
  • *

    CU_GL_DEVICE_LIST_NEXT_FRAME: * Query the devices used by the current OpenGL context to render the next * frame (in SLI). Note that this is a prediction, * it can't be guaranteed that this * is correct in all cases. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaDeviceCount Returned number of CUDA devices. * @param pCudaDevices Returned CUDA devices. * @param cudaDeviceCount The size of the output device array pCudaDevices. * @param deviceList The set of devices to return. * * @return CUDA_SUCCESS, CUDA_ERROR_NO_DEVICE, * CUDA_ERROR_INVALID_VALUECUDA_ERROR_INVALID_CONTEXT * */ public static int cuGLGetDevices(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList) { return checkResult(cuGLGetDevicesNative(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, CUGLDeviceList_deviceList)); } private static native int cuGLGetDevicesNative(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList); /** * Registers an OpenGL buffer object. * *
     * CUresult cuGraphicsGLRegisterBuffer (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
*
*

Registers an OpenGL buffer object. * Registers the buffer object specified by buffer for access * by CUDA. A handle to the registered object is returned as pCudaResource. The register flags Flags specify the * intended usage, as follows: *

*
    *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from and * written to by CUDA. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: * Specifies that CUDA will not write to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will * not read from this resource and will write over the entire * contents of the resource, so * none of the data previously stored in the resource will be preserved. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaResource Pointer to the returned object handle * @param buffer name of buffer object to be registered * @param Flags Register flags * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, * CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsUnregisterResource * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags) { return checkResult(cuGraphicsGLRegisterBufferNative(pCudaResource, buffer, Flags)); } private static native int cuGraphicsGLRegisterBufferNative(CUgraphicsResource pCudaResource, int buffer, int Flags); /** * Register an OpenGL texture or renderbuffer object. * *
     * CUresult cuGraphicsGLRegisterImage (
     *      CUgraphicsResource* pCudaResource,
     *      GLuint image,
     *      GLenum target,
     *      unsigned int  Flags )
     * 
*
*

Register an OpenGL texture or renderbuffer * object. Registers the texture or renderbuffer object specified by image for access by CUDA. A handle to the registered object is * returned as pCudaResource. *

*

target must match the type of * the object, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, * GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D, * GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER. *

*

The register flags Flags * specify the intended usage, as follows: *

*
    *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from and * written to by CUDA. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: * Specifies that CUDA will not write to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will * not read from this resource and will write over the entire * contents of the resource, so * none of the data previously stored in the resource will be preserved. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST: * Specifies that CUDA will bind this resource to a surface * reference. *

    *
  • *
  • *

    CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will * perform texture gather operations on this resource. *

    *
  • *
*

*

The following image formats are * supported. For brevity's sake, the list is abbreviated. For ex., {GL_R, * GL_RG} X {8, 16} would * expand to the following 4 formats {GL_R8, * GL_R16, GL_RG8, GL_RG16} : *

    *
  • *

    GL_RED, GL_RG, GL_RGBA, * GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY *

    *
  • *
  • *

    {GL_R, GL_RG, GL_RGBA} X {8, * 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I} *

    *
  • *
  • *

    {GL_LUMINANCE, GL_ALPHA, * GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT, * 16UI_EXT, 32UI_EXT, 8I_EXT, * 16I_EXT, 32I_EXT} *

    *
  • *
*

*

The following image classes are currently * disallowed: *

    *
  • *

    Textures with borders

    *
  • *
  • *

    Multisampled renderbuffers

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pCudaResource Pointer to the returned object handle * @param image name of texture or renderbuffer object to be registered * @param target Identifies the type of object specified by image * @param Flags Register flags * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED, * CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsUnregisterResource * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray */ public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags ) { return checkResult(cuGraphicsGLRegisterImageNative(pCudaResource, image, target, Flags)); } private static native int cuGraphicsGLRegisterImageNative(CUgraphicsResource pCudaResource, int image, int target, int Flags); /** * Registers an OpenGL buffer object. * *
     * CUresult cuGLRegisterBufferObject (
     *      GLuint buffer )
     * 
*
*

Registers an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Registers the buffer object specified * by buffer for access by CUDA. This function must be called * before CUDA can map the buffer object. There must be a valid OpenGL * context * bound to the current thread when this * function is called, and the buffer name is resolved by that context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer The name of the buffer object to register. * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED * * @see JCudaDriver#cuGraphicsGLRegisterBuffer * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLRegisterBufferObject( int bufferobj ) { throw new UnsupportedOperationException( "This function is deprecated as of CUDA 3.0"); } /** * Maps an OpenGL buffer object. * *
     * CUresult cuGLMapBufferObject (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer )
     * 
*
*

Maps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Maps the buffer object specified by * buffer into the address space of the current CUDA context * and returns in *dptr and *size the base pointer * and size of the resulting mapping. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

All streams in the current CUDA * context are synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned mapped base pointer * @param size Returned size of mapping * @param buffer The name of the buffer object to map * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuGraphicsMapResources * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLMapBufferObject( CUdeviceptr dptr, long size[], int bufferobj ) { return checkResult(cuGLMapBufferObjectNative(dptr, size, bufferobj)); } private static native int cuGLMapBufferObjectNative(CUdeviceptr dptr, long size[], int bufferobj); /** * Unmaps an OpenGL buffer object. * *
     * CUresult cuGLUnmapBufferObject (
     *      GLuint buffer )
     * 
*
*

Unmaps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unmaps the buffer object specified by * buffer for access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

All streams in the current CUDA * context are synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Buffer object to unmap * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnmapResources * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLUnmapBufferObject( int bufferobj ) { return checkResult(cuGLUnmapBufferObjectNative(bufferobj)); } private static native int cuGLUnmapBufferObjectNative(int bufferobj); /** * Unregister an OpenGL buffer object. * *
     * CUresult cuGLUnregisterBufferObject (
     *      GLuint buffer )
     * 
*
*

Unregister an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unregisters the buffer object specified * by buffer. This releases any resources associated with the * registered buffer. After this call, the buffer may no longer be mapped * for * access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Name of the buffer object to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnregisterResource * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLUnregisterBufferObject( int bufferobj ) { return checkResult(cuGLUnregisterBufferObjectNative(bufferobj)); } private static native int cuGLUnregisterBufferObjectNative(int bufferobj); /** * Set the map flags for an OpenGL buffer object. * *
     * CUresult cuGLSetBufferObjectMapFlags (
     *      GLuint buffer,
     *      unsigned int  Flags )
     * 
*
*

Set the map flags for an OpenGL buffer * object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Sets the map flags for the buffer * object specified by buffer. *

*

Changes to Flags will take * effect the next time buffer is mapped. The Flags * argument may be any of the following: *

    *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that this * resource will be read from * and written to by CUDA kernels. This is the default value. *

    *
  • *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: * Specifies that CUDA kernels which access this resource will not write * to this resource. *

    *
  • *
  • *

    CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels * which access this resource will not read from this resource * and will write over the * entire contents of the resource, so none of the data previously stored * in the resource will be preserved. *

    *
  • *
*

*

If buffer has not been * registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is * returned. If buffer is presently mapped for access by CUDA, * then CUDA_ERROR_ALREADY_MAPPED is returned. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Buffer object to unmap * @param Flags Map flags * * @return CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT, * * @see JCudaDriver#cuGraphicsResourceSetMapFlags * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLSetBufferObjectMapFlags( int buffer, int Flags ) { return checkResult((cuGLSetBufferObjectMapFlagsNative(buffer, Flags))); } private static native int cuGLSetBufferObjectMapFlagsNative( int buffer, int Flags ); /** * Maps an OpenGL buffer object. * *
     * CUresult cuGLMapBufferObjectAsync (
     *      CUdeviceptr* dptr,
     *      size_t* size,
     *      GLuint buffer,
     *      CUstream hStream )
     * 
*
*

Maps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Maps the buffer object specified by * buffer into the address space of the current CUDA context * and returns in *dptr and *size the base pointer * and size of the resulting mapping. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

Stream hStream in the * current CUDA context is synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param dptr Returned mapped base pointer * @param size Returned size of mapping * @param buffer The name of the buffer object to map * @param hStream Stream to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_MAP_FAILED * * @see JCudaDriver#cuGraphicsMapResources * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLMapBufferObjectAsync( CUdeviceptr dptr, long size[], int buffer, CUstream hStream) { return checkResult((cuGLMapBufferObjectAsyncNative(dptr, size, buffer, hStream))); } private static native int cuGLMapBufferObjectAsyncNative( CUdeviceptr dptr, long size[], int buffer, CUstream hStream); /** * Unmaps an OpenGL buffer object. * *
     * CUresult cuGLUnmapBufferObjectAsync (
     *      GLuint buffer,
     *      CUstream hStream )
     * 
*
*

Unmaps an OpenGL buffer object. * DeprecatedThis function is * deprecated as of Cuda 3.0.Unmaps the buffer object specified by * buffer for access by CUDA. *

*

There must be a valid OpenGL context * bound to the current thread when this function is called. This must be * the same context, * or a member of the same shareGroup, * as the context that was bound when the buffer was registered. *

*

Stream hStream in the * current CUDA context is synchronized with the current GL context. *

*
* Note: *

Note that * this function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param buffer Name of the buffer object to unmap * @param hStream Stream to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuGraphicsUnmapResources * * @deprecated Deprecated as of CUDA 3.0 */ @Deprecated public static int cuGLUnmapBufferObjectAsync( int buffer, CUstream hStream ) { return checkResult((cuGLUnmapBufferObjectAsyncNative(buffer, hStream))); } private static native int cuGLUnmapBufferObjectAsyncNative( int buffer, CUstream hStream ); /** * Unregisters a graphics resource for access by CUDA. * *
     * CUresult cuGraphicsUnregisterResource (
     *      CUgraphicsResource resource )
     * 
*
*

Unregisters a graphics resource for * access by CUDA. Unregisters the graphics resource resource * so it is not accessible by CUDA unless registered again. *

*

If resource is invalid then * CUDA_ERROR_INVALID_HANDLE is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param resource Resource to unregister * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsGLRegisterBuffer * @see JCudaDriver#cuGraphicsGLRegisterImage */ public static int cuGraphicsUnregisterResource(CUgraphicsResource resource) { return checkResult(cuGraphicsUnregisterResourceNative(resource)); } private static native int cuGraphicsUnregisterResourceNative(CUgraphicsResource resource); /** * Get an array through which to access a subresource of a mapped graphics resource. * *
     * CUresult cuGraphicsSubResourceGetMappedArray (
     *      CUarray* pArray,
     *      CUgraphicsResource resource,
     *      unsigned int  arrayIndex,
     *      unsigned int  mipLevel )
     * 
*
*

Get an array through which to access a * subresource of a mapped graphics resource. Returns in *pArray * an array through which the subresource of the mapped graphics resource * resource which corresponds to array index arrayIndex * and mipmap level mipLevel may be accessed. The value set in * *pArray may change every time that resource is * mapped. *

*

If resource is not a texture * then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY * is returned. If arrayIndex is not a valid array index for * resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then * CUDA_ERROR_INVALID_VALUE is returned. If resource is not * mapped then CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pArray Returned array through which a subresource of resource may be accessed * @param resource Mapped resource to access * @param arrayIndex Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access * @param mipLevel Mipmap level for the subresource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel) { return checkResult(cuGraphicsSubResourceGetMappedArrayNative(pArray, resource, arrayIndex, mipLevel)); } private static native int cuGraphicsSubResourceGetMappedArrayNative(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel); /** * Get a mipmapped array through which to access a mapped graphics resource. * *
     * CUresult cuGraphicsResourceGetMappedMipmappedArray (
     *      CUmipmappedArray* pMipmappedArray,
     *      CUgraphicsResource resource )
     * 
*
*

Get a mipmapped array through which to * access a mapped graphics resource. Returns in *pMipmappedArray * a mipmapped array through which the mapped graphics resource resource. The value set in *pMipmappedArray may change * every time that resource is mapped. *

*

If resource is not a texture * then it cannot be accessed via a mipmapped array and * CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If resource is * not mapped then CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pMipmappedArray Returned mipmapped array through which resource may be accessed * @param resource Mapped resource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer */ public static int cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource) { return checkResult(cuGraphicsResourceGetMappedMipmappedArrayNative(pMipmappedArray, resource)); } private static native int cuGraphicsResourceGetMappedMipmappedArrayNative(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource); /** * Get a device pointer through which to access a mapped graphics resource. * *
     * CUresult cuGraphicsResourceGetMappedPointer (
     *      CUdeviceptr* pDevPtr,
     *      size_t* pSize,
     *      CUgraphicsResource resource )
     * 
*
*

Get a device pointer through which to * access a mapped graphics resource. Returns in *pDevPtr a * pointer through which the mapped graphics resource resource * may be accessed. Returns in pSize the size of the memory in * bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is * mapped. *

*

If resource is not a buffer * then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER * is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED * is returned. * *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pDevPtr Returned pointer through which resource may be accessed * @param pSize Returned size of the buffer accessible starting at *pPointer * @param resource Mapped resource to access * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_POINTER * * @see JCudaDriver#cuGraphicsMapResources * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray */ public static int cuGraphicsResourceGetMappedPointer( CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource ) { return checkResult(cuGraphicsResourceGetMappedPointerNative(pDevPtr, pSize, resource)); } private static native int cuGraphicsResourceGetMappedPointerNative(CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource); /** * Set usage flags for mapping a graphics resource. * *
     * CUresult cuGraphicsResourceSetMapFlags (
     *      CUgraphicsResource resource,
     *      unsigned int  flags )
     * 
*
*

Set usage flags for mapping a graphics * resource. Set flags for mapping the graphics resource resource. *

*

Changes to flags will take * effect the next time resource is mapped. The flags * argument may be any of the following: *

*
    *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: * Specifies no hints about how this resource will be used. It is therefore * assumed that * this resource will be read from * and written to by CUDA kernels. This is the default value. *

    *
  • *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: * Specifies that CUDA kernels which access this resource will not write * to this resource. *

    *
  • *
  • *

    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA * kernels which access this resource will not read from this * resource and will write over * the entire contents of the resource, so none of the data previously * stored in the resource will * be preserved. *

    *
  • *
*

*

If resource is presently * mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned. * If flags is not one of the above values then * CUDA_ERROR_INVALID_VALUE is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param resource Registered resource to set flags for * @param flags Parameters for resource mapping * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, int flags ) { return checkResult(cuGraphicsResourceSetMapFlagsNative(resource, flags)); } private static native int cuGraphicsResourceSetMapFlagsNative( CUgraphicsResource resource, int flags ); /** * Map graphics resources for access by CUDA. * *
     * CUresult cuGraphicsMapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
*
*

Map graphics resources for access by * CUDA. Maps the count graphics resources in resources * for access by CUDA. *

*

The resources in resources * may be accessed by CUDA until they are unmapped. The graphics API from * which resources were registered should not access any * resources while they are mapped by CUDA. If an application does so, * the results are * undefined. *

*

This function provides the synchronization * guarantee that any graphics calls issued before cuGraphicsMapResources() * will complete before any subsequent CUDA work issued in stream * begins. *

*

If resources includes any * duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any * of resources are presently mapped for access by CUDA then * CUDA_ERROR_ALREADY_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Number of resources to map * @param resources Resources to map for CUDA usage * @param hStream Stream with which to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsResourceGetMappedPointer * @see JCudaDriver#cuGraphicsSubResourceGetMappedArray * @see JCudaDriver#cuGraphicsUnmapResources */ public static int cuGraphicsMapResources(int count, CUgraphicsResource resources[], CUstream hStream) { return checkResult(cuGraphicsMapResourcesNative(count, resources, hStream)); } private static native int cuGraphicsMapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream); /** * Unmap graphics resources. * *
     * CUresult cuGraphicsUnmapResources (
     *      unsigned int  count,
     *      CUgraphicsResource* resources,
     *      CUstream hStream )
     * 
*
*

Unmap graphics resources. Unmaps the * count graphics resources in resources. *

*

Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped * again. *

*

This function provides the synchronization * guarantee that any CUDA work issued in stream before * cuGraphicsUnmapResources() will complete before any subsequently issued * graphics work begins. *

*

If resources includes any * duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any * of resources are not presently mapped for access by CUDA then * CUDA_ERROR_NOT_MAPPED is returned. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param count Number of resources to unmap * @param resources Resources to unmap * @param hStream Stream with which to synchronize * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuGraphicsMapResources */ public static int cuGraphicsUnmapResources( int count, CUgraphicsResource resources[], CUstream hStream) { return checkResult(cuGraphicsUnmapResourcesNative(count, resources, hStream)); } private static native int cuGraphicsUnmapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream); /** * Returns a module handle * * Returns in *hmod the handle of the module that function hfunc * is located in. The lifetime of the module corresponds to the lifetime of * the context it was loaded in or until the module is explicitly unloaded. * * The CUDA runtime manages its own modules loaded into the primary context. * If the handle returned by this API refers to a module loaded by the CUDA runtime, * calling ::cuModuleUnload() on that module will result in undefined behavior. * * @param hmod - Returned module handle * @param hfunc - Function to retrieve module for * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND */ public static int cuFuncGetModule(CUmodule hmod, CUfunction hfunc) { return checkResult(cuFuncGetModuleNative(hmod, hfunc)); } private static native int cuFuncGetModuleNative(CUmodule hmod, CUfunction hfunc); /** * Set resource limits. * *
     * CUresult cuCtxSetLimit (
     *      CUlimit limit,
     *      size_t value )
     * 
*
*

Set resource limits. Setting limit to value is a request by the application to * update the current limit maintained by the context. The driver is free * to modify the requested * value to meet h/w requirements (this * could be clamping to minimum or maximum values, rounding up to nearest * element size, * etc). The application can use * cuCtxGetLimit() to find out exactly what the limit has been set to. *

*

Setting each CUlimit has its own specific * restrictions, so each is discussed here. *

*
    *
  • *

    CU_LIMIT_STACK_SIZE controls * the stack size in bytes of each GPU thread. This limit is only * applicable to devices of compute capability 2.0 and * higher. Attempting to set this * limit on devices of compute capability less than 2.0 will result in * the error CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_PRINTF_FIFO_SIZE * controls the size in bytes of the FIFO used by the printf() device * system call. Setting CU_LIMIT_PRINTF_FIFO_SIZE must be performed before * launching any kernel that uses the printf() device system call, * otherwise CUDA_ERROR_INVALID_VALUE will be returned. This limit is only * applicable to devices of compute capability 2.0 and higher. Attempting * to set this limit * on devices of compute capability * less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT * being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_MALLOC_HEAP_SIZE * controls the size in bytes of the heap used by the malloc() and free() * device system calls. Setting CU_LIMIT_MALLOC_HEAP_SIZE must be performed * before launching any kernel that uses the malloc() or free() device * system calls, otherwise CUDA_ERROR_INVALID_VALUE will be returned. This * limit is only applicable to devices of compute capability 2.0 and * higher. Attempting to set this limit * on devices of compute capability * less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT * being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH * controls the maximum nesting depth of a grid at which a thread can * safely call cudaDeviceSynchronize(). Setting this limit * must be performed before any * launch of a kernel that uses the device runtime and calls * cudaDeviceSynchronize() above the default * sync depth, two levels of grids. * Calls to cudaDeviceSynchronize() will fail with error code * cudaErrorSyncDepthExceeded if * the limitation is violated. This * limit can be set smaller than the default or up the maximum launch * depth of 24. When setting * this limit, keep in mind that * additional levels of sync depth require the driver to reserve large * amounts of device memory * which can no longer be used for * user allocations. If these reservations of device memory fail, * cuCtxSetLimit will return CUDA_ERROR_OUT_OF_MEMORY, and the limit can * be reset to a lower value. This limit is only applicable to devices of * compute capability 3.5 and higher. * Attempting to set this limit on * devices of compute capability less than 3.5 will result in the error * CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*

*
    *
  • *

    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number * of outstanding device runtime launches that can be made from the * current context. A grid is outstanding * from the point of launch up * until the grid is known to have been completed. Device runtime launches * which violate this limitation * fail and return * cudaErrorLaunchPendingCountExceeded when cudaGetLastError() is called * after launch. If more pending launches * than the default (2048 launches) * are needed for a module using the device runtime, this limit can be * increased. Keep in mind * that being able to sustain * additional pending launches will require the driver to reserve larger * amounts of device memory * upfront which can no longer be * used for allocations. If these reservations fail, cuCtxSetLimit will * return CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower * value. This limit is only applicable to devices of compute capability * 3.5 and higher. * Attempting to set this limit on * devices of compute capability less than 3.5 will result in the error * CUDA_ERROR_UNSUPPORTED_LIMIT being returned. *

    *
  • *
*
    *
  • *

    CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity. * Values can range from 0B to 128B. This is purely a performance hint and * it can be ignored or clamped depending on the platform. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param limit Limit to set * @param value Size of limit * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_UNSUPPORTED_LIMIT, * CUDA_ERROR_OUT_OF_MEMORY * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxSetLimit(int limit, long value) { return checkResult(cuCtxSetLimitNative(limit, value)); } private static native int cuCtxSetLimitNative(int limit, long value); /** * Returns the preferred cache configuration for the current context. * *
     * CUresult cuCtxGetCacheConfig (
     *      CUfunc_cache* pconfig )
     * 
*
*

Returns the preferred cache configuration * for the current context. On devices where the L1 cache and shared * memory use the * same hardware resources, this function * returns through pconfig the preferred cache configuration * for the current context. This is only a preference. The driver will * use the requested configuration * if possible, but it is free to choose a * different configuration if required to execute functions. *

*

This will return a pconfig of * CU_FUNC_CACHE_PREFER_NONE on devices where the size of the L1 cache * and shared memory are fixed. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pconfig Returned cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxGetCacheConfig(int pconfig[]) { return checkResult(cuCtxGetCacheConfigNative(pconfig)); } private static native int cuCtxGetCacheConfigNative(int[] pconfig); /** * Sets the preferred cache configuration for the current context. * *
     * CUresult cuCtxSetCacheConfig (
     *      CUfunc_cache config )
     * 
*
*

Sets the preferred cache configuration * for the current context. On devices where the L1 cache and shared * memory use the same * hardware resources, this sets through * config the preferred cache configuration for the current * context. This is only a preference. The driver will use the requested * configuration * if possible, but it is free to choose a * different configuration if required to execute the function. Any * function preference * set via cuFuncSetCacheConfig() will be * preferred over this context-wide setting. Setting the context-wide * cache configuration to CU_FUNC_CACHE_PREFER_NONE will cause subsequent * kernel launches to prefer to not change the cache configuration unless * required to launch the kernel. *

*

This setting does nothing on devices * where the size of the L1 cache and shared memory are fixed. *

*

Launching a kernel with a different * preference than the most recent preference setting may insert a * device-side synchronization * point. *

*

The supported cache configurations are: *

    *
  • *

    CU_FUNC_CACHE_PREFER_NONE: no * preference for shared memory or L1 (default) *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_SHARED: * prefer larger shared memory and smaller L1 cache *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_L1: prefer * larger L1 cache and smaller shared memory *

    *
  • *
  • *

    CU_FUNC_CACHE_PREFER_EQUAL: * prefer equal sized L1 cache and shared memory *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param config Requested cache configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxSetCacheConfig(int config) { return checkResult(cuCtxSetCacheConfigNative(config)); } private static native int cuCtxSetCacheConfigNative(int config); /** * Returns the current shared memory configuration for the current context. * *
     * CUresult cuCtxGetSharedMemConfig (
     *      CUsharedconfig* pConfig )
     * 
*
*

Returns the current shared memory * configuration for the current context. This function will return in * pConfig the current size of shared memory banks in the * current context. On devices with configurable shared memory banks, * cuCtxSetSharedMemConfig can be used to change this setting, so that * all subsequent kernel launches will by default use the new bank size. * When cuCtxGetSharedMemConfig is called on devices without configurable * shared memory, it will return the fixed bank size of the hardware. *

*

The returned bank configurations can be * either: *

    *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is * four bytes. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width * will eight bytes. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pConfig returned shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxGetSharedMemConfig(int pConfig[]) { return checkResult(cuCtxGetSharedMemConfigNative(pConfig)); } private static native int cuCtxGetSharedMemConfigNative(int pConfig[]); /** * Sets the shared memory configuration for the current context. * *
     * CUresult cuCtxSetSharedMemConfig (
     *      CUsharedconfig config )
     * 
*
*

Sets the shared memory configuration for * the current context. On devices with configurable shared memory banks, * this function * will set the context's shared memory bank * size which is used for subsequent kernel launches. *

*

Changed the shared memory configuration * between launches may insert a device side synchronization point between * those launches. *

*

Changing the shared memory bank size * will not increase shared memory usage or affect occupancy of kernels, * but may have major * effects on performance. Larger bank sizes * will allow for greater potential bandwidth to shared memory, but will * change what * kinds of accesses to shared memory will * result in bank conflicts. *

*

This function will do nothing on devices * with fixed shared memory bank size. *

*

The supported bank configurations are: *

    *
  • *

    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: * set bank width to the default initial setting (currently, four bytes). *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width * to be natively four bytes. *

    *
  • *
  • *

    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank * width to be natively eight bytes. *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param config requested shared memory configuration * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize * @see JCudaDriver#cuCtxGetSharedMemConfig * @see JCudaDriver#cuFuncSetCacheConfig */ public static int cuCtxSetSharedMemConfig(int config) { return checkResult(cuCtxSetSharedMemConfigNative(config)); } private static native int cuCtxSetSharedMemConfigNative(int config); /** * Gets the context's API version. * *
     * CUresult cuCtxGetApiVersion (
     *      CUcontext ctx,
     *      unsigned int* version )
     * 
*
*

Gets the context's API version. Returns * a version number in version corresponding to the capabilities * of the context (e.g. 3010 or 3020), which library developers can use * to direct callers * to a specific API version. If ctx is NULL, returns the API version used to create the currently * bound context. *

*

Note that new API versions are only * introduced when context capabilities are changed that break binary * compatibility, so the * API version and driver version may be * different. For example, it is valid for the API version to be 3020 * while the driver * version is 4020. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param ctx Context to check * @param version Pointer to version * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxGetLimit * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetApiVersion(CUcontext ctx, int version[]) { return checkResult(cuCtxGetApiVersionNative(ctx, version)); } private static native int cuCtxGetApiVersionNative(CUcontext ctx, int version[]); /** * Returns numerical values that correspond to the least and * greatest stream priorities.
*
* Returns in *leastPriority and *greatestPriority the numerical values that correspond * to the least and greatest stream priorities respectively. Stream priorities * follow a convention where lower numbers imply greater priorities. The range of * meaningful stream priorities is given by [*greatestPriority, *leastPriority]. * If the user attempts to create a stream with a priority value that is * outside the meaningful range as specified by this API, the priority is * automatically clamped down or up to either *leastPriority or *greatestPriority * respectively. See ::cuStreamCreateWithPriority for details on creating a * priority stream.
* A NULL may be passed in for *leastPriority or *greatestPriority if the value * is not desired.
*
* This function will return '0' in both \p *leastPriority and \p *greatestPriority if * the current context's device does not support stream priorities * (see ::cuDeviceGetAttribute). * * @param leastPriority Pointer to an int in which the numerical value for least * stream priority is returned * @param greatestPriority Pointer to an int in which the numerical value for greatest * stream priority is returned * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE * * @see JCudaDriver#cuStreamCreateWithPriority * @see JCudaDriver#cuStreamGetPriority, * @see JCudaDriver#cuCtxGetDevice, * @see JCudaDriver#cuCtxSetLimit, * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetStreamPriorityRange(int leastPriority[], int greatestPriority[]) { return checkResult(cuCtxGetStreamPriorityRangeNative(leastPriority, greatestPriority)); } private static native int cuCtxGetStreamPriorityRangeNative(int leastPriority[], int greatestPriority[]); /** * Resets all persisting lines in cache to normal status. * * ::cuCtxResetPersistingL2Cache Resets all persisting lines in cache to normal * status. Takes effect on function return. * * @return CUDA_SUCCESS, CUDA_ERROR_NOT_SUPPORTED * * @see CUaccessPolicyWindow */ public static int cuCtxResetPersistingL2Cache() { return checkResult(cuCtxResetPersistingL2CacheNative()); } private static native int cuCtxResetPersistingL2CacheNative(); /** * Launches a CUDA function. * *
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
CUresult cuLaunchKernel (CUfunction  f,
unsigned int  gridDimX,
unsigned int  gridDimY,
unsigned int  gridDimZ,
unsigned int  blockDimX,
unsigned int  blockDimY,
unsigned int  blockDimZ,
unsigned int  sharedMemBytes,
CUstream  hStream,
void **  kernelParams,
void **  extra 
)
*
*
*

* Invokes the kernel f on a gridDimX x * gridDimY x gridDimZ grid of blocks. Each * block contains blockDimX x blockDimY x * blockDimZ threads. *

* sharedMemBytes sets the amount of dynamic shared memory * that will be available to each thread block. *

* cuLaunchKernel() can optionally be associated to a stream by passing a * non-zero hStream argument. *

* Kernel parameters to f can be specified in one of two * ways: *

* 1) Kernel parameters can be specified via kernelParams. * If f has N parameters, then kernelParams * needs to be an array of N pointers. Each of kernelParams[0] * through kernelParams[N-1] must point to a region of memory * from which the actual kernel parameter will be copied. The number of * kernel parameters and their offsets and sizes do not need to be * specified as that information is retrieved directly from the kernel's * image. *

* 2) Kernel parameters can also be packaged by the application into a * single buffer that is passed in via the extra parameter. * This places the burden on the application of knowing each kernel * parameter's size and alignment/padding within the buffer. Here is an * example of using the extra parameter in this manner: *

*
    size_t argBufferSize;
     *     char argBuffer[256];
     *
     *     // populate argBuffer and argBufferSize
     *
     *     void *config[] = {
     *         CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
     *         CU_LAUNCH_PARAM_BUFFER_SIZE,    &argBufferSize,
     *         CU_LAUNCH_PARAM_END
     *     };
     *     status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL,
     * config);
     * 
*
*

* The extra parameter exists to allow cuLaunchKernel to take * additional less commonly used arguments. extra specifies * a list of names of extra settings and their corresponding values. Each * extra setting name is immediately followed by the corresponding value. * The list must be terminated with either NULL or * CU_LAUNCH_PARAM_END. *

*

    *
  • CU_LAUNCH_PARAM_END, which indicates the end of the extra * array; *
  • *
  • CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that * the next value in extra will be a pointer to a buffer * containing all the kernel parameters for launching kernel * f; *
  • *
  • CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies * that the next value in extra will be a pointer to a size_t * containing the size of the buffer specified with * CU_LAUNCH_PARAM_BUFFER_POINTER; *
  • *
*

* The error CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters * are specified with both kernelParams and extra * (i.e. both kernelParams and extra are * non-NULL). *

* Calling cuLaunchKernel() sets persistent function state that is the * same as function state set through the following deprecated APIs: *

* cuFuncSetBlockShape() cuFuncSetSharedSize() cuParamSetSize() * cuParamSeti() cuParamSetf() cuParamSetv() *

* When the kernel f is launched via cuLaunchKernel(), the * previous block shape, shared size and parameter info associated with * f is overwritten. *

* Note that to use cuLaunchKernel(), the kernel f must * either have been compiled with toolchain version 3.2 or later so that * it will contain kernel parameter information, or have no kernel * parameters. If either of these conditions is not met, then * cuLaunchKernel() will return CUDA_ERROR_INVALID_IMAGE. *

*

*
* * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute */ public static int cuLaunchKernel( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra) { return checkResult(cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra)); } private static native int cuLaunchKernelNative( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra); /** * Launches a CUDA function where thread blocks can cooperate and synchronize as they execute. * *
     * CUresult cuLaunchCooperativeKernel (
     *      CUfunction f,
     *      unsigned int  gridDimX,
     *      unsigned int  gridDimY,
     *      unsigned int  gridDimZ,
     *      unsigned int  blockDimX,
     *      unsigned int  blockDimY,
     *      unsigned int  blockDimZ,
     *      unsigned int  sharedMemBytes,
     *      CUstream hStream,
     *      void** kernelParams )
     * 
*
Launches a CUDA function where thread blocks can cooperate * and synchronize as they execute. *
*
*
Description
*

Invokes the kernel f on a * gridDimX x gridDimY x gridDimZ grid of * blocks. Each block contains blockDimX x blockDimY * x blockDimZ threads. *

*

sharedMemBytes sets the * amount of dynamic shared memory that will be available to each thread * block. *

*

The device on which this kernel is * invoked must have a non-zero value for the device attribute * CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH. *

*

The total number of blocks launched * cannot exceed the maximum number of blocks per multiprocessor as * returned by cuOccupancyMaxActiveBlocksPerMultiprocessor (or * cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number * of multiprocessors as specified by the device attribute * CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. *

*

The kernel cannot make use of CUDA * dynamic parallelism. *

*

Kernel parameters must be specified * via kernelParams. If f has N parameters, then kernelParams needs to be an array of N pointers. Each of kernelParams[0] through kernelParams[N-1] must point * to a region of memory from which the actual kernel parameter will be * copied. The number of kernel parameters * and their offsets and sizes do not * need to be specified as that information is retrieved directly from * the kernel's image. *

*

Calling cuLaunchCooperativeKernel() * sets persistent function state that is the same as function state set * through cuLaunchKernel API *

*

When the kernel f is * launched via cuLaunchCooperativeKernel(), the previous block shape, * shared size and parameter info associated with f is * overwritten. *

*

Note that to use * cuLaunchCooperativeKernel(), the kernel f must either have * been compiled with toolchain version 3.2 or later so that it will * contain kernel parameter information, * or have no kernel parameters. If * either of these conditions is not met, then cuLaunchCooperativeKernel() * will return CUDA_ERROR_INVALID_IMAGE. *

*
* Note: *
    *
  • *

    This function uses * standard default stream semantics. *

    *
  • *
  • *

    Note that this function * may also return error codes from previous, asynchronous launches. *

    *
  • *
*
*

*
* * @param f Kernel to launch * @param gridDimX Width of grid in blocks * @param gridDimY Height of grid in blocks * @param gridDimZ Depth of grid in blocks * @param blockDimX X dimension of each thread block * @param blockDimY Y dimension of each thread block * @param blockDimZ Z dimension of each thread block * @param sharedMemBytes Dynamic shared-memory size per thread block in bytes * @param hStream Stream identifier * @param kernelParams Array of pointers to kernel parameters * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuLaunchCooperativeKernelMultiDevice * @see JCudaDriver#cudaLaunchCooperativeKernel */ public static int cuLaunchCooperativeKernel( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams) { return checkResult(cuLaunchCooperativeKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams)); } private static native int cuLaunchCooperativeKernelNative( CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams); /** * Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they execute. * *
     * CUresult cuLaunchCooperativeKernelMultiDevice (
     *      CUDA_LAUNCH_PARAMS* launchParamsList,
     *      unsigned int  numDevices,
     *      unsigned int  flags )
     * 
*
Launches CUDA functions on multiple devices where thread * blocks can cooperate and synchronize as they execute. *
*
*
Description
*

Invokes kernels as specified in the * launchParamsList array where each element of the array * specifies all the parameters required to perform a single kernel * launch. These kernels * can cooperate and synchronize as they * execute. The size of the array is specified by numDevices. *

*

No two kernels can be launched on * the same device. All the devices targeted by this multi-device launch * must be identical. * All devices must have a non-zero value * for the device attribute * CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH. *

*

All kernels launched must be identical * with respect to the compiled code. Note that any __device__, __constant__ * or __managed__ * variables present in the module that * owns the kernel launched on each device, are independently instantiated * on every device. * It is the application's responsiblity * to ensure these variables are initialized and used appropriately. *

*

The size of the grids as specified * in blocks, the size of the blocks themselves and the amount of shared * memory used by each * thread block must also match across * all launched kernels. *

*

The streams used to launch these * kernels must have been created via either cuStreamCreate or * cuStreamCreateWithPriority. The NULL stream or CU_STREAM_LEGACY or * CU_STREAM_PER_THREAD cannot be used. *

*

The total number of blocks launched * per kernel cannot exceed the maximum number of blocks per multiprocessor * as returned by * cuOccupancyMaxActiveBlocksPerMultiprocessor * (or cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the * number of multiprocessors as specified by the device attribute * CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. Since the total number of * blocks launched per device has to match across all devices, the maximum * number of blocks that * can be launched per device will be * limited by the device with the least number of multiprocessors. *

*

The kernels cannot make use of CUDA * dynamic parallelism. *

*

The CUDA_LAUNCH_PARAMS structure is * defined as: *

        typedef struct CUDA_LAUNCH_PARAMS_st
     *               {
     *                   CUfunction function;
     *                   unsigned int gridDimX;
     *                   unsigned int gridDimY;
     *                   unsigned int gridDimZ;
     *                   unsigned int blockDimX;
     *                   unsigned int blockDimY;
     *                   unsigned int blockDimZ;
     *                   unsigned int sharedMemBytes;
     *                   CUstream hStream;
     *                   void **kernelParams;
     *               } CUDA_LAUNCH_PARAMS;
* where: *
    *
  • *

    CUDA_LAUNCH_PARAMS::function * specifies the kernel to be launched. All functions must be identical * with respect to the compiled code. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::gridDimX * is the width of the grid in blocks. This must match across all kernels * launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::gridDimY * is the height of the grid in blocks. This must match across all kernels * launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::gridDimZ * is the depth of the grid in blocks. This must match across all kernels * launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::blockDimX * is the X dimension of each thread block. This must match across all * kernels launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::blockDimX * is the Y dimension of each thread block. This must match across all * kernels launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::blockDimZ * is the Z dimension of each thread block. This must match across all * kernels launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::sharedMemBytes * is the dynamic shared-memory size per thread block in bytes. This must * match across all kernels launched. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::hStream * is the handle to the stream to perform the launch in. This cannot be * the NULL stream or CU_STREAM_LEGACY or CU_STREAM_PER_THREAD. The CUDA * context associated with this stream must match that associated with * CUDA_LAUNCH_PARAMS::function. *

    *
  • *
  • *

    CUDA_LAUNCH_PARAMS::kernelParams * is an array of pointers to kernel parameters. If CUDA_LAUNCH_PARAMS::function * has N parameters, then CUDA_LAUNCH_PARAMS::kernelParams needs to be an * array of N pointers. Each of CUDA_LAUNCH_PARAMS::kernelParams[0] * through CUDA_LAUNCH_PARAMS::kernelParams[N-1] must point to a region * of memory from which the actual kernel parameter will be copied. The * number of kernel parameters * and their offsets and sizes * do not need to be specified as that information is retrieved directly * from the kernel's image. *

    *
  • *
*

*

By default, the kernel won't begin * execution on any GPU until all prior work in all the specified streams * has completed. This * behavior can be overridden by * specifying the flag CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC. * When this flag is specified, each kernel will only wait for prior work * in the stream corresponding to that GPU to complete * before it begins execution. *

*

Similarly, by default, any subsequent * work pushed in any of the specified streams will not begin execution * until the kernels * on all GPUs have completed. This * behavior can be overridden by specifying the flag * CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC. When this * flag is specified, any subsequent work pushed in any of the specified * streams will only wait for the kernel launched * on the GPU corresponding to that * stream to complete before it begins execution. *

*

Calling * cuLaunchCooperativeKernelMultiDevice() sets persistent function state * that is the same as function state set through cuLaunchKernel API when * called individually for each element in launchParamsList. *

*

When kernels are launched via * cuLaunchCooperativeKernelMultiDevice(), the previous block shape, * shared size and parameter info associated with each * CUDA_LAUNCH_PARAMS::function in launchParamsList is * overwritten. *

*

Note that to use * cuLaunchCooperativeKernelMultiDevice(), the kernels must either have * been compiled with toolchain version 3.2 or later so that it will * contain kernel parameter * information, or have no kernel * parameters. If either of these conditions is not met, then * cuLaunchCooperativeKernelMultiDevice() will return * CUDA_ERROR_INVALID_IMAGE. *

*
* Note: *
    *
  • *

    This function uses * standard default stream semantics. *

    *
  • *
  • *

    Note that this function * may also return error codes from previous, asynchronous launches. *

    *
  • *
*
*

*
* * @param launchParamsList List of launch parameters, one per device * @param numDevices Size of the launchParamsList array * @param flags Flags to control launch behavior * * @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, * CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, * CUDA_ERROR_SHARED_OBJECT_INIT_FAILED * * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuFuncSetCacheConfig * @see JCudaDriver#cuFuncGetAttribute * @see JCudaDriver#cuLaunchCooperativeKernel * @see JCudaDriver#cudaLaunchCooperativeKernelMultiDevice */ public static int cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS launchParamsList[], int numDevices, int flags) { return checkResult(cuLaunchCooperativeKernelMultiDeviceNative(launchParamsList, numDevices, flags)); } private static native int cuLaunchCooperativeKernelMultiDeviceNative(CUDA_LAUNCH_PARAMS launchParamsList[], int numDevices, int flags); /** * Enqueues a host function call in a stream. * * Enqueues a host function to run in a stream. The function will be called * after currently enqueued work and will block work added after it.
*
* The host function must not make any CUDA API calls. Attempting to use a * CUDA API may result in ::CUDA_ERROR_NOT_PERMITTED, but this is not required. * The host function must not perform any synchronization that may depend on * outstanding CUDA work not mandated to run earlier. Host functions without a * mandated order (such as in independent streams) execute in undefined order * and may be serialized.
*
* For the purposes of Unified Memory, execution makes a number of guarantees: *
    *
  • The stream is considered idle for the duration of the function's * execution. Thus, for example, the function may always use memory attached * to the stream it was enqueued in.
  • *
  • The start of execution of the function has the same effect as * synchronizing an event recorded in the same stream immediately prior to * the function. It thus synchronizes streams which have been "joined" * prior to the function.
  • *
  • Adding device work to any stream does not have the effect of making * the stream active until all preceding host functions and stream callbacks * have executed. Thus, for * example, a function might use global attached memory even if work has * been added to another stream, if the work has been ordered behind the * function call with an event.
  • *
  • Completion of the function does not cause a stream to become * active except as described above. The stream will remain idle * if no device work follows the function, and will remain idle across * consecutive host functions or stream callbacks without device work in * between. Thus, for example, * stream synchronization can be done by signaling from a host function at the * end of the stream.
  • *
* * Note that, in contrast to ::cuStreamAddCallback, the function will not be * called in the event of an error in the CUDA context. * * @param hStream - Stream to enqueue function call in * @param fn - The function to call once preceding stream operations are complete * @param userData - User-specified data to be passed to the function * * @return * CUDA_SUCCESS, * CUDA_ERROR_DEINITIALIZED, * CUDA_ERROR_NOT_INITIALIZED, * CUDA_ERROR_INVALID_CONTEXT, * CUDA_ERROR_INVALID_HANDLE, * CUDA_ERROR_NOT_SUPPORTED * * @see * JCudaDriver#cuStreamCreate * JCudaDriver#cuStreamQuery * JCudaDriver#cuStreamSynchronize * JCudaDriver#cuStreamWaitEvent * JCudaDriver#cuStreamDestroy * JCudaDriver#cuMemAllocManaged * JCudaDriver#cuStreamAttachMemAsync * JCudaDriver#cuStreamAddCallback */ public static int cuLaunchHostFunc(CUstream hStream, CUhostFn fn, Object userData) { return checkResult(cuLaunchHostFuncNative(hStream, fn, userData)); } private static native int cuLaunchHostFuncNative(CUstream hStream, CUhostFn fn, Object userData); /** * Returns resource limits. * *
     * CUresult cuCtxGetLimit (
     *      size_t* pvalue,
     *      CUlimit limit )
     * 
*
*

Returns resource limits. Returns in *pvalue the current size of limit. The supported * CUlimit values are: *

    *
  • *

    CU_LIMIT_STACK_SIZE: stack size * in bytes of each GPU thread. *

    *
  • *
  • *

    CU_LIMIT_PRINTF_FIFO_SIZE: size * in bytes of the FIFO used by the printf() device system call. *

    *
  • *
  • *

    CU_LIMIT_MALLOC_HEAP_SIZE: size * in bytes of the heap used by the malloc() and free() device system * calls. *

    *
  • *
  • *

    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: * maximum grid depth at which a thread can issue the device runtime call * cudaDeviceSynchronize() to wait on child grid launches * to complete. *

    *
  • *
  • *

    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of * outstanding device runtime launches that can be made from this * context. *

    *
  • *
  • *

    CU_LIMIT_MAX_L2_FETCH_GRANULARITY: L2 cache fetch granularity *

    *
  • *
*

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param pvalue Returned size of limit * @param limit Limit to query * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_UNSUPPORTED_LIMIT * * @see JCudaDriver#cuCtxCreate * @see JCudaDriver#cuCtxDestroy * @see JCudaDriver#cuCtxGetApiVersion * @see JCudaDriver#cuCtxGetCacheConfig * @see JCudaDriver#cuCtxGetDevice * @see JCudaDriver#cuCtxPopCurrent * @see JCudaDriver#cuCtxPushCurrent * @see JCudaDriver#cuCtxSetCacheConfig * @see JCudaDriver#cuCtxSetLimit * @see JCudaDriver#cuCtxSynchronize */ public static int cuCtxGetLimit(long pvalue[], int limit) { return checkResult(cuCtxGetLimitNative(pvalue, limit)); } private static native int cuCtxGetLimitNative(long pvalue[], int limit); /** * Initialize the profiling. * *
     * CUresult cuProfilerInitialize (
     *      const char* configFile,
     *      const char* outputFile,
     *      CUoutput_mode outputMode )
     * 
*
*

Initialize the profiling. Using this * API user can initialize the CUDA profiler by specifying the configuration * file, output * file and output file format. This API is * generally used to profile different set of counters by looping the * kernel launch. * The configFile parameter can * be used to select profiling options including profiler counters. Refer * to the "Compute Command Line Profiler * User Guide" for supported profiler * options and counters. *

*

Limitation: The CUDA profiler cannot be * initialized with this API if another profiling tool is already active, * as indicated * by the CUDA_ERROR_PROFILER_DISABLED * return code. *

*

Typical usage of the profiling APIs is * as follows: *

*

for each set of counters/options * { * cuProfilerInitialize(); //Initialize * profiling, set the counters or options in the config file * ... * cuProfilerStart(); * // code to be profiled * cuProfilerStop(); * ... * cuProfilerStart(); * // code to be profiled * cuProfilerStop(); * ... * } *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * @param configFile Name of the config file that lists the counters/options for profiling. * @param outputFile Name of the outputFile where the profiling results will be stored. * @param outputMode outputMode, can be CU_OUT_KEY_VALUE_PAIR or CU_OUT_CSV. * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, * CUDA_ERROR_PROFILER_DISABLED * * @see JCudaDriver#cuProfilerStart * @see JCudaDriver#cuProfilerStop * * @deprecated Deprecated as of CUDA 11.0 */ public static int cuProfilerInitialize(String configFile, String outputFile, int outputMode) { return checkResult(cuProfilerInitializeNative(configFile, outputFile, outputMode)); } private static native int cuProfilerInitializeNative(String configFile, String outputFile, int outputMode); /** * Enable profiling. * *
     * CUresult cuProfilerStart (
     *      void )
     * 
*
*

Enable profiling. Enables profile * collection by the active profiling tool. If profiling is already * enabled, then cuProfilerStart() has no effect. *

*

cuProfilerStart and cuProfilerStop APIs * are used to programmatically control the profiling granularity by * allowing profiling * to be done only on selective pieces of * code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuProfilerInitialize * @see JCudaDriver#cuProfilerStop */ public static int cuProfilerStart() { return checkResult(cuProfilerStartNative()); } private static native int cuProfilerStartNative(); /** * Disable profiling. * *
     * CUresult cuProfilerStop (
     *      void )
     * 
*
*

Disable profiling. Disables profile * collection by the active profiling tool. If profiling is already * disabled, then cuProfilerStop() has no effect. *

*

cuProfilerStart and cuProfilerStop APIs * are used to programmatically control the profiling granularity by * allowing profiling * to be done only on selective pieces of * code. *

*
* Note: *

Note that this * function may also return error codes from previous, asynchronous * launches. *

*
*

*
* * * @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT * * @see JCudaDriver#cuProfilerInitialize * @see JCudaDriver#cuProfilerStart */ public static int cuProfilerStop() { return checkResult(cuProfilerStopNative()); } private static native int cuProfilerStopNative(); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy