jcuda.driver.JCudaDriver Maven / Gradle / Ivy
/*
*
* * Copyright 2015 Skymind,Inc.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package jcuda.driver;
import jcuda.CudaException;
import jcuda.LibUtils;
import jcuda.LogLevel;
import jcuda.Pointer;
/**
* Java bindings for the NVidia CUDA driver API.
*
* Most comments are extracted from the CUDA online documentation
*/
public class JCudaDriver
{
/** The CUDA version */
public static final int CUDA_VERSION = 7000;
/**
* If set, host memory is portable between CUDA contexts.
* Flag for {@link JCudaDriver#cuMemHostAlloc}
*/
public static final int CU_MEMHOSTALLOC_PORTABLE = 0x01;
/**
* If set, host memory is mapped into CUDA address space and
* JCudaDriver#cuMemHostGetDevicePointer may be called on the host pointer.
* Flag for {@link JCudaDriver#cuMemHostAlloc}
*/
public static final int CU_MEMHOSTALLOC_DEVICEMAP = 0x02;
/**
* If set, host memory is allocated as write-combined - fast to write,
* faster to DMA, slow to read except via SSE4 streaming load instruction
* (MOVNTDQA).
* Flag for {@link JCudaDriver#cuMemHostAlloc}
*/
public static final int CU_MEMHOSTALLOC_WRITECOMBINED = 0x04;
/**
* If set, host memory is portable between CUDA contexts.
* Flag for ::cuMemHostRegister()
*/
public static final int CU_MEMHOSTREGISTER_PORTABLE = 0x01;
/**
* If set, host memory is mapped into CUDA address space and
* ::cuMemHostGetDevicePointer() may be called on the host pointer.
* Flag for ::cuMemHostRegister()
*/
public static final int CU_MEMHOSTREGISTER_DEVICEMAP = 0x02;
/**
* If set, peer memory is mapped into CUDA address space and
* ::cuMemPeerGetDevicePointer() may be called on the host pointer.
* Flag for ::cuMemPeerRegister()
* @deprecated This value has been added in CUDA 4.0 RC,
* and removed in CUDA 4.0 RC2
*/
public static final int CU_MEMPEERREGISTER_DEVICEMAP = 0x02;
/**
* If set, the CUDA array is a collection of layers, where each layer is either a 1D
* or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number
* of layers, not the depth of a 3D array.
*/
public static final int CUDA_ARRAY3D_LAYERED = 0x01;
/**
* If set, the CUDA array contains an array of 2D slices
* and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
* the number of slices, not the depth of a 3D array.
* @deprecated use CUDA_ARRAY3D_LAYERED
*/
public static final int CUDA_ARRAY3D_2DARRAY = 0x01;
/**
* This flag must be set in order to bind a surface reference
* to the CUDA array
*/
public static final int CUDA_ARRAY3D_SURFACE_LDST = 0x02;
/**
* If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The
* width of such a CUDA array must be equal to its height, and Depth must be six.
* If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps
* and Depth must be a multiple of six.
*/
public static final int CUDA_ARRAY3D_CUBEMAP = 0x04;
/**
* This flag must be set in order to perform texture gather operations
* on a CUDA array.
*/
public static final int CUDA_ARRAY3D_TEXTURE_GATHER = 0x08;
/**
* This flag if set indicates that the CUDA
* array is a DEPTH_TEXTURE.
*/
public static final int CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10;
/**
* For texture references loaded into the module, use default
* texunit from texture reference
*/
public static final int CU_PARAM_TR_DEFAULT = -1;
/**
* Override the texref format with a format inferred from the array
*/
public static final int CU_TRSA_OVERRIDE_FORMAT = 0x01;
/**
* Read the texture as integers rather than promoting the values
* to floats in the range [0,1]
*/
public static final int CU_TRSF_READ_AS_INTEGER = 0x01;
/**
* Use normalized texture coordinates in the range [0,1) instead of [0,dim)
*/
public static final int CU_TRSF_NORMALIZED_COORDINATES = 0x02;
/**
* Perform sRGB->linear conversion during texture read.
* Flag for JCudaDriver#cuTexRefSetFlags()
*/
public static final int CU_TRSF_SRGB = 0x10;
/**
* Specifies a stream callback does not block the stream while
* executing. This is the default behavior.
* Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
*
* @deprecated This flag was only present in CUDA 5.0.25 (release candidate)
* and may be removed (or added again) in future releases
*/
public static final int CU_STREAM_CALLBACK_NONBLOCKING = 0x00;
/**
* If set, the stream callback blocks the stream until it is
* done executing.
* Flag for {@link JCudaDriver#cuStreamAddCallback(CUstream, CUstreamCallback, Object, int)}
*
* @deprecated This flag was only present in CUDA 5.0.25 (release candidate)
* and may be removed (or added again) in future releases
*/
public static final int CU_STREAM_CALLBACK_BLOCKING = 0x01;
/**
* Private inner class for the constant pointer values
* CU_LAUNCH_PARAM_END, CU_LAUNCH_PARAM_BUFFER_POINTER,
* and CU_LAUNCH_PARAM_BUFFER_SIZE.
*
* TODO: These constants could be misused: There is no
* mechanism for preventing these Pointers to be used
* for memory allocation. However, at the moment there
* is no other way for emulating these pointer constants.
*/
private static class ConstantPointer extends Pointer
{
private ConstantPointer(long value)
{
super(value);
}
}
/**
* End of array terminator for the \p extra parameter to
* ::cuLaunchKernel
*/
public static final Pointer CU_LAUNCH_PARAM_END = new ConstantPointer(0); // ((void*)0x00)
/**
* Indicator that the next value in the \p extra parameter to
* ::cuLaunchKernel will be a pointer to a buffer containing all kernel
* parameters used for launching kernel \p f. This buffer needs to
* honor all alignment/padding requirements of the individual parameters.
* If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
* \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no
* effect.
*/
public static final Pointer CU_LAUNCH_PARAM_BUFFER_POINTER = new ConstantPointer(1); //((void*)0x01)
/**
* Indicator that the next value in the \p extra parameter to
* ::cuLaunchKernel will be a pointer to a size_t which contains the
* size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER.
* It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
* in the \p extra array if the value associated with
* ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
*/
public static final Pointer CU_LAUNCH_PARAM_BUFFER_SIZE = new ConstantPointer(2); // ((void*)0x02)
/**
* Private inner class for the constant stream values
*/
private static class ConstantCUstream extends CUstream
{
private ConstantCUstream(long value)
{
super(value);
}
}
/**
* Stream handle that can be passed as a CUstream to use an implicit stream
* with legacy synchronization behavior.
*/
public static final CUstream CU_STREAM_LEGACY = new ConstantCUstream(0x1);
/**
* Stream handle that can be passed as a CUstream to use an implicit stream
* with per-thread synchronization behavior.
*/
public static final CUstream CU_STREAM_PER_THREAD = new ConstantCUstream(0x2);
/**
* Whether a CudaException should be thrown if a method is about
* to return a result code that is not CUresult.CUDA_SUCCESS
*/
private static boolean exceptionsEnabled = false;
static
{
LibUtils.loadLibrary("JCudaDriver");
}
/* Private constructor to prevent instantiation */
private JCudaDriver()
{
}
/**
* Set the specified log level for the JCuda driver library.
*
* Currently supported log levels:
*
* LOG_QUIET: Never print anything
* LOG_ERROR: Print error messages
* LOG_TRACE: Print a trace of all native function calls
*
* @param logLevel The log level to use.
*/
public static void setLogLevel(LogLevel logLevel)
{
setLogLevel(logLevel.ordinal());
}
private static native void setLogLevel(int logLevel);
/**
* Enables or disables exceptions. By default, the methods of this class
* only return the CUresult error code from the underlying CUDA function.
* If exceptions are enabled, a CudaException with a detailed error
* message will be thrown if a method is about to return a result code
* that is not CUresult.CUDA_SUCCESS
*
* @param enabled Whether exceptions are enabled
*/
public static void setExceptionsEnabled(boolean enabled)
{
exceptionsEnabled = enabled;
}
/**
* If the given result is different to CUresult.CUDA_SUCCESS and
* exceptions have been enabled, this method will throw a
* CudaException with an error message that corresponds to the
* given result code. Otherwise, the given result is simply
* returned.
*
* @param result The result to check
* @return The result that was given as the parameter
* @throws CudaException If exceptions have been enabled and
* the given result code is not CUresult.CUDA_SUCCESS
*/
private static int checkResult(int result) {
if (exceptionsEnabled && result != CUresult.CUDA_SUCCESS) {
throw new CudaException(CUresult.stringFor(result));
}
return result;
}
/**
* Returns the given (address) value, adjusted to have
* the given alignment. This function may be used to
* align the parameters for a kernel call according
* to their alignment requirements.
*
* @param value The address value
* @param alignment The desired alignment
* @return The aligned address value
* @deprecated This method was intended for a simpler
* kernel parameter setup in earlier CUDA versions,
* and should not be required any more. It may be
* removed in future releases.
*/
public static int align(int value, int alignment)
{
return (((value) + (alignment) - 1) & ~((alignment) - 1));
}
/**
* A wrapper function for
* {@link JCudaDriver#cuModuleLoadDataEx(CUmodule, Pointer, int, int[], Pointer)}
* which allows passing in the options for the JIT compiler, and obtaining
* the output of the JIT compiler via a {@link JITOptions} object.
*
* Note: This method should be considered as preliminary,
* and might change in future releases.
*
*/
public static int cuModuleLoadDataJIT(CUmodule module, Pointer pointer, JITOptions jitOptions)
{
return cuModuleLoadDataJITNative(module, pointer, jitOptions);
}
private static native int cuModuleLoadDataJITNative(CUmodule module, Pointer pointer, JITOptions jitOptions);
/**
*
* Gets the string description of an error code
*
* Sets *pStr to the address of a NULL-terminated string description
* of the error code error.
* If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
* will be returned and *pStr will be set to the NULL address.
*
*
* @param error - Error code to convert to string
* @param pStr - Address of the string pointer.
*
* @return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE
*
* @see CUresult
*/
public static int cuGetErrorString(int error, String pStr[])
{
return checkResult(cuGetErrorStringNative(error, pStr));
}
private static native int cuGetErrorStringNative(int error, String pStr[]);
/**
*
* Gets the string representation of an error code enum name
*
* Sets *pStr to the address of a NULL-terminated string representation
* of the name of the enum error code error.
* If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
* will be returned and *pStr will be set to the NULL address.
*
* @param error - Error code to convert to string
* @param pStr - Address of the string pointer.
*
* @return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE
*
* @see CUresult
*/
public static int cuGetErrorName(int error, String pStr[])
{
return checkResult(cuGetErrorNameNative(error, pStr));
}
private static native int cuGetErrorNameNative(int error, String pStr[]);
/**
* Initialize the CUDA driver API.
*
*
* CUresult cuInit (
* unsigned int Flags )
*
*
* Initialize the CUDA driver API.
* Initializes the driver API and must be called before any other function
* from the driver API.
* Currently, the Flags parameter
* must be 0. If cuInit() has not been called, any function from the
* driver API will return CUDA_ERROR_NOT_INITIALIZED.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param Flags Initialization flag for CUDA.
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
*/
public static int cuInit(int Flags)
{
return checkResult(cuInitNative(Flags));
}
private static native int cuInitNative(int Flags);
/**
* Returns a handle to a compute device.
*
*
* CUresult cuDeviceGet (
* CUdevice* device,
* int ordinal )
*
*
* Returns a handle to a compute device.
* Returns in *device a device handle given an ordinal in the
* range [0, cuDeviceGetCount()-1].
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param device Returned device handle
* @param ordinal Device number to get handle for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceGet(CUdevice device, int ordinal)
{
return checkResult(cuDeviceGetNative(device, ordinal));
}
private static native int cuDeviceGetNative(CUdevice device, int ordinal);
/**
* Returns the number of compute-capable devices.
*
*
* CUresult cuDeviceGetCount (
* int* count )
*
*
* Returns the number of compute-capable
* devices. Returns in *count the number of devices with
* compute capability greater than or equal to 1.0 that are available for
* execution. If there is
* no such device, cuDeviceGetCount()
* returns 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param count Returned number of compute-capable devices
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceGetCount(int count[])
{
return checkResult(cuDeviceGetCountNative(count));
}
private static native int cuDeviceGetCountNative(int count[]);
/**
* Returns an identifer string for the device.
*
*
* CUresult cuDeviceGetName (
* char* name,
* int len,
* CUdevice dev )
*
*
* Returns an identifer string for the
* device. Returns an ASCII string identifying the device dev
* in the NULL-terminated string pointed to by name. len specifies the maximum length of the string that may be
* returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param name Returned identifier string for the device
* @param len Maximum length of string to store in name
* @param dev Device to get identifier string for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceGetName(byte name[], int len, CUdevice dev)
{
return checkResult(cuDeviceGetNameNative(name, len, dev));
}
private static native int cuDeviceGetNameNative(byte name[], int len, CUdevice dev);
/**
* Returns the compute capability of the device.
*
*
* CUresult cuDeviceComputeCapability (
* int* major,
* int* minor,
* CUdevice dev )
*
*
* Returns the compute capability of the
* device.
* DeprecatedThis function was deprecated
* as of CUDA 5.0 and its functionality superceded by
* cuDeviceGetAttribute().
*
* Returns in *major and *minor the major and minor revision numbers that define the
* compute capability of the device dev.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param major Major revision number
* @param minor Minor revision number
* @param dev Device handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceComputeCapability(int major[], int minor[], CUdevice dev)
{
return checkResult(cuDeviceComputeCapabilityNative(major, minor, dev));
}
private static native int cuDeviceComputeCapabilityNative(int major[], int minor[], CUdevice dev);
public static int cuDevicePrimaryCtxRetain(CUcontext pctx, CUdevice dev)
{
return checkResult(cuDevicePrimaryCtxRetainNative(pctx, dev));
}
private static native int cuDevicePrimaryCtxRetainNative(CUcontext pctx, CUdevice dev);
public static int cuDevicePrimaryCtxRelease(CUdevice dev)
{
return checkResult(cuDevicePrimaryCtxReleaseNative(dev));
}
private static native int cuDevicePrimaryCtxReleaseNative(CUdevice dev);
public static int cuDevicePrimaryCtxSetFlags(CUdevice dev, int flags)
{
return checkResult(cuDevicePrimaryCtxSetFlagsNative(dev, flags));
}
private static native int cuDevicePrimaryCtxSetFlagsNative(CUdevice dev, int flags);
/**
* Returns the total amount of memory on the device.
*
*
* CUresult cuDeviceTotalMem (
* size_t* bytes,
* CUdevice dev )
*
*
* Returns the total amount of memory on
* the device. Returns in *bytes the total amount of memory
* available on the device dev in bytes.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param bytes Returned memory available on device in bytes
* @param dev Device handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceGet
*/
public static int cuDeviceTotalMem(long bytes[], CUdevice dev)
{
return checkResult(cuDeviceTotalMemNative(bytes, dev));
}
private static native int cuDeviceTotalMemNative(long bytes[], CUdevice dev);
/**
* Returns properties for a selected device.
*
*
* CUresult cuDeviceGetProperties (
* CUdevprop* prop,
* CUdevice dev )
*
*
* Returns properties for a selected device.
* DeprecatedThis function was deprecated
* as of CUDA 5.0 and replaced by cuDeviceGetAttribute().
*
* Returns in *prop the properties
* of device dev. The CUdevprop structure is defined as:
*
* typedef struct CUdevprop_st {
* int maxThreadsPerBlock;
* int maxThreadsDim[3];
* int maxGridSize[3];
* int sharedMemPerBlock;
* int totalConstantMemory;
* int SIMDWidth;
* int memPitch;
* int regsPerBlock;
* int clockRate;
* int textureAlign
* } CUdevprop;
* where:
*
* -
*
maxThreadsPerBlock is the
* maximum number of threads per block;
*
*
* -
*
maxThreadsDim[3] is the maximum
* sizes of each dimension of a block;
*
*
* -
*
maxGridSize[3] is the maximum
* sizes of each dimension of a grid;
*
*
* -
*
sharedMemPerBlock is the total
* amount of shared memory available per block in bytes;
*
*
* -
*
totalConstantMemory is the
* total amount of constant memory available on the device in bytes;
*
*
* -
*
SIMDWidth is the warp
* size;
*
*
* -
*
memPitch is the maximum pitch
* allowed by the memory copy functions that involve memory regions
* allocated through cuMemAllocPitch();
*
*
* -
*
regsPerBlock is the total
* number of registers available per block;
*
*
* -
*
clockRate is the clock frequency
* in kilohertz;
*
*
* -
*
textureAlign is the alignment
* requirement; texture base addresses that are aligned to textureAlign
* bytes do not need an offset
* applied to texture fetches.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param prop Returned properties of device
* @param dev Device to get properties for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceGetProperties(CUdevprop prop, CUdevice dev)
{
return checkResult(cuDeviceGetPropertiesNative(prop, dev));
}
private static native int cuDeviceGetPropertiesNative(CUdevprop prop, CUdevice dev);
/**
* Returns information about the device.
*
*
* CUresult cuDeviceGetAttribute (
* int* pi,
* CUdevice_attribute attrib,
* CUdevice dev )
*
*
* Returns information about the device.
* Returns in *pi the integer value of the attribute attrib on device dev. The supported attributes are:
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads
* per block;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X:
* Maximum x-dimension of a block;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y:
* Maximum y-dimension of a block;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z:
* Maximum z-dimension of a block;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X:
* Maximum x-dimension of a grid;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y:
* Maximum y-dimension of a grid;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z:
* Maximum z-dimension of a grid;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
* shared memory available to a thread block in bytes; this amount is
* shared by all thread blocks simultaneously
* resident on a multiprocessor;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device
* for __constant__ variables in a CUDA C kernel in bytes;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_WARP_SIZE:
* Warp size in threads;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_PITCH:
* Maximum pitch in bytes allowed by the memory copy functions that
* involve memory regions allocated through cuMemAllocPitch();
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D texture
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width for
* a 1D texture bound to linear memory;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum
* mipmapped 1D texture width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D texture
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D texture
* height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width for
* a 2D texture bound to linear memory;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height
* for a 2D texture bound to linear memory;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch in
* bytes for a 2D texture bound to linear memory;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum
* mipmapped 2D texture width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum
* mipmapped 2D texture height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D texture
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D texture
* height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D texture
* depth;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: Alternate
* maximum 3D texture width, 0 if no alternate maximum 3D texture size is
* supported;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: Alternate
* maximum 3D texture height, 0 if no alternate maximum 3D texture size
* is supported;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: Alternate
* maximum 3D texture depth, 0 if no alternate maximum 3D texture size is
* supported;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: Maximum cubemap
* texture width or height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: Maximum 1D
* layered texture width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: Maximum layers
* in a 1D layered texture;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: Maximum 2D
* layered texture width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: Maximum 2D
* layered texture height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: Maximum layers
* in a 2D layered texture;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: Maximum
* cubemap layered texture width or height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: Maximum
* layers in a cubemap layered texture;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: Maximum 1D surface
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: Maximum 2D surface
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: Maximum 2D surface
* height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: Maximum 3D surface
* width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: Maximum 3D surface
* height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: Maximum 3D surface
* depth;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: Maximum 1D
* layered surface width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: Maximum layers
* in a 1D layered surface;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: Maximum 2D
* layered surface width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: Maximum 2D
* layered surface height;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: Maximum layers
* in a 2D layered surface;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: Maximum cubemap
* surface width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: Maximum
* cubemap layered surface width;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: Maximum
* layers in a cubemap layered surface;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit
* registers available to a thread block; this number is shared by all
* thread blocks simultaneously
* resident on a multiprocessor;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_CLOCK_RATE:
* Typical clock frequency in kilohertz;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT:
* Alignment requirement; texture base addresses aligned to textureAlign
* bytes do not need an offset applied to texture fetches;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment
* requirement for 2D texture references bound to pitched memory;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP:
* 1 if the device can concurrently copy memory between host and device
* while executing a kernel, or 0 if not;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors
* on the device;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT:
* 1 if there is a run time limit for kernels executed on the device, or
* 0 if not;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_INTEGRATED:
* 1 if the device is integrated with the memory subsystem, or 0 if not;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY:
* 1 if the device can map host memory into the CUDA address space, or 0
* if not;
*
*
* -
*
* CU_DEVICE_ATTRIBUTE_COMPUTE_MODE:
* Compute mode that device is currently in. Available modes are as
* follows:
*
* -
*
CU_COMPUTEMODE_DEFAULT:
* Default mode - Device is not restricted and can have multiple CUDA
* contexts present at a single time.
*
*
* -
*
CU_COMPUTEMODE_EXCLUSIVE:
* Compute-exclusive mode - Device can have only one CUDA context present
* on it at a time.
*
*
* -
*
CU_COMPUTEMODE_PROHIBITED:
* Compute-prohibited mode - Device is prohibited from creating new CUDA
* contexts.
*
*
* -
*
CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode -
* Device can have only one context used by a single process at a time.
*
*
*
*
*
* -
*
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS:
* 1 if the device supports executing multiple kernels within the same
* context simultaneously, or 0 if not. It is not guaranteed
* that multiple kernels will be
* resident on the device concurrently so this feature should not be
* relied upon for correctness;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_ECC_ENABLED:
* 1 if error correction is enabled on the device, 0 if error correction
* is disabled or not supported by the device;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID:
* PCI bus identifier of the device;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID:
* PCI device (also known as slot) identifier of the device;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_TCC_DRIVER:
* 1 if the device is using a TCC driver. TCC is only available on Tesla
* hardware running Windows Vista or later;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE:
* Peak memory clock frequency in kilohertz;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width
* in bits;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE:
* Size of L2 cache in bytes. 0 if the device doesn't have L2 cache;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident
* threads per multiprocessor;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING:
* 1 if the device shares a unified address space with the host, or 0 if
* not;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability
* version number;
*
*
* -
*
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability
* version number;
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pi Returned device attribute value
* @param attrib Device attribute to query
* @param dev Device handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGetCount
* @see JCudaDriver#cuDeviceGetName
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceTotalMem
*/
public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev)
{
return checkResult(cuDeviceGetAttributeNative(pi, attrib, dev));
}
private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev);
/**
* Returns the CUDA driver version.
*
*
* CUresult cuDriverGetVersion (
* int* driverVersion )
*
*
* Returns the CUDA driver version. Returns
* in *driverVersion the version number of the installed CUDA
* driver. This function automatically returns CUDA_ERROR_INVALID_VALUE
* if the driverVersion argument is NULL.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param driverVersion Returns the CUDA driver version
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
*
*/
public static int cuDriverGetVersion (int driverVersion[])
{
return checkResult(cuDriverGetVersionNative(driverVersion));
}
private static native int cuDriverGetVersionNative(int driverVersion[]);
/**
* Create a CUDA context.
*
*
* CUresult cuCtxCreate (
* CUcontext* pctx,
* unsigned int flags,
* CUdevice dev )
*
*
* Create a CUDA context. Creates a new
* CUDA context and associates it with the calling thread. The flags parameter is described below. The context is created with
* a usage count of 1 and the caller of cuCtxCreate() must call
* cuCtxDestroy() or when done using the context. If a context is already
* current to the thread, it is supplanted by the newly created context
* and may be restored by a subsequent call
* to cuCtxPopCurrent().
*
* The three LSBs of the flags
* parameter can be used to control how the OS thread, which owns the CUDA
* context at the time of an API call, interacts with
* the OS scheduler when waiting for results
* from the GPU. Only one of the scheduling flags can be set when creating
* a context.
*
*
* -
*
CU_CTX_SCHED_AUTO: The default
* value if the flags parameter is zero, uses a heuristic based
* on the number of active CUDA contexts in the process C and the number
* of logical
* processors in the system P. If
* C > P, then CUDA will yield to other OS threads when waiting for
* the GPU, otherwise CUDA will
* not yield while waiting for
* results and actively spin on the processor.
*
*
*
*
*
* -
*
CU_CTX_SCHED_SPIN: Instruct
* CUDA to actively spin when waiting for results from the GPU. This can
* decrease latency when waiting for the GPU,
* but may lower the performance
* of CPU threads if they are performing work in parallel with the CUDA
* thread.
*
*
*
*
*
* -
*
CU_CTX_SCHED_YIELD: Instruct
* CUDA to yield its thread when waiting for results from the GPU. This
* can increase latency when waiting for the
* GPU, but can increase the
* performance of CPU threads performing work in parallel with the GPU.
*
*
*
*
*
* -
*
CU_CTX_SCHED_BLOCKING_SYNC:
* Instruct CUDA to block the CPU thread on a synchronization primitive
* when waiting for the GPU to finish work.
*
*
*
*
*
* -
*
CU_CTX_BLOCKING_SYNC: Instruct
* CUDA to block the CPU thread on a synchronization primitive when
* waiting for the GPU to finish work.
*
* Deprecated:
* This flag was deprecated as of CUDA 4.0 and was replaced with
* CU_CTX_SCHED_BLOCKING_SYNC.
*
*
*
*
*
* -
*
CU_CTX_MAP_HOST: Instruct CUDA
* to support mapped pinned allocations. This flag must be set in order
* to allocate pinned host memory that is
* accessible to the GPU.
*
*
*
*
*
* -
*
CU_CTX_LMEM_RESIZE_TO_MAX:
* Instruct CUDA to not reduce local memory after resizing local memory
* for a kernel. This can prevent thrashing by local memory
* allocations when launching many
* kernels with high local memory usage at the cost of potentially
* increased memory usage.
*
*
*
*
* Context creation will fail with
* CUDA_ERROR_UNKNOWN if the compute mode of the device is
* CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will also fail
* with CUDA_ERROR_UNKNOWN if the compute mode for the device is set to
* CU_COMPUTEMODE_EXCLUSIVE and there is already an active context on the
* device. The function cuDeviceGetAttribute() can be used with
* CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the
* device. The nvidia-smi tool can be used to set the compute mode for
* devices. Documentation
* for nvidia-smi can be obtained by passing
* a -h option to it.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pctx Returned context handle of the new context
* @param flags Context creation flags
* @param dev Device to create context on
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_DEVICE,
* CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev)
{
return checkResult(cuCtxCreateNative(pctx, flags, dev));
}
private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev);
/**
* Destroy a CUDA context.
*
*
* CUresult cuCtxDestroy (
* CUcontext ctx )
*
*
* Destroy a CUDA context. Destroys the
* CUDA context specified by ctx. The context ctx will
* be destroyed regardless of how many threads it is current to. It is
* the responsibility of the calling function to ensure
* that no API call issues using ctx while cuCtxDestroy() is executing.
*
* If ctx is current to the
* calling thread then ctx will also be popped from the current
* thread's context stack (as though cuCtxPopCurrent() were called). If
* ctx is current to other threads, then ctx will
* remain current to those threads, and attempting to access ctx
* from those threads will result in the error
* CUDA_ERROR_CONTEXT_IS_DESTROYED.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param ctx Context to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxDestroy(CUcontext ctx)
{
return checkResult(cuCtxDestroyNative(ctx));
}
private static native int cuCtxDestroyNative(CUcontext ctx);
/**
* Increment a context's usage-count.
*
*
* CUresult cuCtxAttach (
* CUcontext* pctx,
* unsigned int flags )
*
*
* Increment a context's usage-count.
* DeprecatedNote that this function is
* deprecated and should not be used.
*
* Increments the usage count of the
* context and passes back a context handle in *pctx that must
* be passed to cuCtxDetach() when the application is done with the
* context. cuCtxAttach() fails if there is no context current to the
* thread.
*
* Currently, the flags parameter
* must be 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pctx Returned context handle of the current context
* @param flags Context attach flags (must be 0)
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxDetach
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxAttach(CUcontext pctx, int flags)
{
return checkResult(cuCtxAttachNative(pctx, flags));
}
private static native int cuCtxAttachNative(CUcontext pctx, int flags);
/**
* Decrement a context's usage-count.
*
*
* CUresult cuCtxDetach (
* CUcontext ctx )
*
*
* Decrement a context's usage-count.
* DeprecatedNote that this function is
* deprecated and should not be used.
*
* Decrements the usage count of the
* context ctx, and destroys the context if the usage count goes
* to 0. The context must be a handle that was passed back by cuCtxCreate()
* or cuCtxAttach(), and must be current to the calling thread.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param ctx Context to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxDetach(CUcontext ctx)
{
return checkResult(cuCtxDetachNative(ctx));
}
private static native int cuCtxDetachNative(CUcontext ctx);
/**
* Pushes a context on the current CPU thread.
*
*
* CUresult cuCtxPushCurrent (
* CUcontext ctx )
*
*
* Pushes a context on the current CPU
* thread. Pushes the given context ctx onto the CPU thread's
* stack of current contexts. The specified context becomes the CPU
* thread's current context, so all CUDA
* functions that operate on the current
* context are affected.
*
* The previous current context may be made
* current again by calling cuCtxDestroy() or cuCtxPopCurrent().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param ctx Context to push
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxPushCurrent(CUcontext ctx)
{
return checkResult(cuCtxPushCurrentNative(ctx));
}
private static native int cuCtxPushCurrentNative(CUcontext ctx);
/**
* Pops the current CUDA context from the current CPU thread.
*
*
* CUresult cuCtxPopCurrent (
* CUcontext* pctx )
*
*
* Pops the current CUDA context from the
* current CPU thread. Pops the current CUDA context from the CPU thread
* and passes back
* the old context handle in *pctx.
* That context may then be made current to a different CPU thread by
* calling cuCtxPushCurrent().
*
* If a context was current to the CPU
* thread before cuCtxCreate() or cuCtxPushCurrent() was called, this
* function makes that context current to the CPU thread again.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pctx Returned new context handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxPopCurrent(CUcontext pctx)
{
return checkResult(cuCtxPopCurrentNative(pctx));
}
private static native int cuCtxPopCurrentNative(CUcontext pctx);
/**
* Binds the specified CUDA context to the calling CPU thread.
*
*
* CUresult cuCtxSetCurrent (
* CUcontext ctx )
*
*
* Binds the specified CUDA context to the
* calling CPU thread. Binds the specified CUDA context to the calling
* CPU thread. If
* ctx is NULL then the CUDA
* context previously bound to the calling CPU thread is unbound and
* CUDA_SUCCESS is returned.
*
* If there exists a CUDA context stack on
* the calling CPU thread, this will replace the top of that stack with
* ctx. If ctx is NULL then this will be equivalent
* to popping the top of the calling CPU thread's CUDA context stack (or
* a no-op if the
* calling CPU thread's CUDA context stack
* is empty).
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param ctx Context to bind to the calling CPU thread
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxGetCurrent
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
*/
public static int cuCtxSetCurrent(CUcontext ctx)
{
return checkResult(cuCtxSetCurrentNative(ctx));
}
private static native int cuCtxSetCurrentNative(CUcontext ctx);
/**
* Returns the CUDA context bound to the calling CPU thread.
*
*
* CUresult cuCtxGetCurrent (
* CUcontext* pctx )
*
*
* Returns the CUDA context bound to the
* calling CPU thread. Returns in *pctx the CUDA context bound
* to the calling CPU thread. If no context is bound to the calling CPU
* thread then *pctx is set to NULL and CUDA_SUCCESS is
* returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pctx Returned context handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED,
* CUDA_ERROR_NOT_INITIALIZED,
*
* @see JCudaDriver#cuCtxSetCurrent
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
*/
public static int cuCtxGetCurrent(CUcontext pctx)
{
return checkResult(cuCtxGetCurrentNative(pctx));
}
private static native int cuCtxGetCurrentNative(CUcontext pctx);
/**
* Returns the device ID for the current context.
*
*
* CUresult cuCtxGetDevice (
* CUdevice* device )
*
*
* Returns the device ID for the current
* context. Returns in *device the ordinal of the current
* context's device.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param device Returned device ID for the current context
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxGetDevice(CUdevice device)
{
return checkResult(cuCtxGetDeviceNative(device));
}
private static native int cuCtxGetDeviceNative(CUdevice device);
public static int cuCtxGetFlags(int flags[])
{
return checkResult(cuCtxGetFlagsNative(flags));
}
private static native int cuCtxGetFlagsNative(int flags[]);
/**
* Block for a context's tasks to complete.
*
*
* CUresult cuCtxSynchronize (
* void )
*
*
* Block for a context's tasks to complete.
* Blocks until the device has completed all preceding requested tasks.
* cuCtxSynchronize() returns an error if one of the preceding tasks
* failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC
* flag, the CPU thread will block until the GPU context has finished its
* work.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
*/
public static int cuCtxSynchronize()
{
return checkResult(cuCtxSynchronizeNative());
}
private static native int cuCtxSynchronizeNative();
/**
* Loads a compute module.
*
*
* CUresult cuModuleLoad (
* CUmodule* module,
* const char* fname )
*
*
* Loads a compute module. Takes a filename
* fname and loads the corresponding module module
* into the current context. The CUDA driver API does not attempt to
* lazily allocate the resources needed by a module; if the
* memory for functions and data (constant
* and global) needed by the module cannot be allocated, cuModuleLoad()
* fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc
* or handwritten, or a fatbin file as output by nvcc
* from toolchain 4.0 or later.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param module Returned module
* @param fname Filename of module to load
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_FILE_NOT_FOUND,
* CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleLoad(CUmodule module, String fname)
{
return checkResult(cuModuleLoadNative(module, fname));
}
private static native int cuModuleLoadNative(CUmodule module, String fname);
/**
* Load a module's data.
*
*
* CUresult cuModuleLoadData (
* CUmodule* module,
* const void* image )
*
*
* Load a module's data. Takes a pointer
* image and loads the corresponding module module
* into the current context. The pointer may be obtained by mapping a
* cubin or PTX or fatbin file, passing a cubin or PTX or
* fatbin file as a NULL-terminated text
* string, or incorporating a cubin or fatbin object into the executable
* resources and
* using operating system calls such as
* Windows FindResource() to obtain the pointer.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param module Returned module
* @param image Module data to load
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleLoadData(CUmodule module, byte image[])
{
return checkResult(cuModuleLoadDataNative(module, image));
}
private static native int cuModuleLoadDataNative(CUmodule module, byte image[]);
/**
* Load a module's data with options.
*
* Note: It is hardly possible to properly pass in the required
* option values for this method. Thus, the arguments here must be
* numOptions=0
* options=new int[0]
* optionValues=Pointer.to(new int[0]))
* For passing in real options, use
* {@link #cuModuleLoadDataJIT(CUmodule, Pointer, JITOptions)} instead
*
*
* CUresult cuModuleLoadDataEx (
* CUmodule* module,
* const void* image,
* unsigned int numOptions,
* CUjit_option* options,
* void** optionValues )
*
*
* Load a module's data with options. Takes
* a pointer image and loads the corresponding module module into the current context. The pointer may be obtained by
* mapping a cubin or PTX or fatbin file, passing a cubin or PTX or
* fatbin file as a NULL-terminated text
* string, or incorporating a cubin or fatbin object into the executable
* resources and
* using operating system calls such as
* Windows FindResource() to obtain the pointer. Options are
* passed as an array via options and any corresponding
* parameters are passed in optionValues. The number of total
* options is supplied via numOptions. Any outputs will be
* returned via optionValues. Supported options are (types for
* the option values are specified in parentheses after the option name):
*
*
* -
*
CU_JIT_MAX_REGISTERS: (unsigned
* int) input specifies the maximum number of registers per thread;
*
*
* -
*
CU_JIT_THREADS_PER_BLOCK:
* (unsigned int) input specifies number of threads per block to target
* compilation for; output returns the number of threads
* the compiler actually targeted;
*
*
* -
*
CU_JIT_WALL_TIME: (float)
* output returns the float value of wall clock time, in milliseconds,
* spent compiling the PTX code;
*
*
* -
*
CU_JIT_INFO_LOG_BUFFER: (char*)
* input is a pointer to a buffer in which to print any informational log
* messages from PTX assembly (the buffer size
* is specified via option
* CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
*
*
* -
*
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES:
* (unsigned int) input is the size in bytes of the buffer; output is the
* number of bytes filled with messages;
*
*
* -
*
CU_JIT_ERROR_LOG_BUFFER:
* (char*) input is a pointer to a buffer in which to print any error log
* messages from PTX assembly (the buffer size is specified
* via option
* CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
*
*
* -
*
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES:
* (unsigned int) input is the size in bytes of the buffer; output is the
* number of bytes filled with messages;
*
*
* -
*
CU_JIT_OPTIMIZATION_LEVEL:
* (unsigned int) input is the level of optimization to apply to generated
* code (0 - 4), with 4 being the default and highest
* level;
*
*
* -
*
CU_JIT_TARGET_FROM_CUCONTEXT:
* (No option value) causes compilation target to be determined based on
* current attached context (default);
*
*
* -
*
* CU_JIT_TARGET: (unsigned int
* for enumerated type CUjit_target_enum) input is the compilation target
* based on supplied CUjit_target_enum;
* possible values are:
*
* -
*
CU_TARGET_COMPUTE_10
*
* -
*
CU_TARGET_COMPUTE_11
*
* -
*
CU_TARGET_COMPUTE_12
*
* -
*
CU_TARGET_COMPUTE_13
*
* -
*
CU_TARGET_COMPUTE_20
*
*
*
*
* -
*
* CU_JIT_FALLBACK_STRATEGY:
* (unsigned int for enumerated type CUjit_fallback_enum) chooses fallback
* strategy if matching cubin is not found; possible
* values are:
*
* -
*
CU_PREFER_PTX
*
* -
*
CU_PREFER_BINARY
*
*
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param module Returned module
* @param image Module data to load
* @param numOptions Number of options
* @param options Options for JIT
* @param optionValues Option values for JIT
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
* CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleLoadDataEx (CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues)
{
return checkResult(cuModuleLoadDataExNative(phMod, p, numOptions, options, optionValues));
}
private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], Pointer optionValues);
/**
* Load a module's data.
*
*
* CUresult cuModuleLoadFatBinary (
* CUmodule* module,
* const void* fatCubin )
*
*
* Load a module's data. Takes a pointer
* fatCubin and loads the corresponding module module
* into the current context. The pointer represents a fat binary object,
* which is a collection of different cubin and/or PTX
* files, all representing the same device
* code, but compiled and optimized for different architectures.
*
* Prior to CUDA 4.0, there was no
* documented API for constructing and using fat binary objects by
* programmers. Starting with
* CUDA 4.0, fat binary objects can be
* constructed by providing the -fatbin option to nvcc.
* More information can be found in the nvcc document.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param module Returned module
* @param fatCubin Fat binary to load
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_NOT_FOUND,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NO_BINARY_FOR_GPU,
* CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleLoadFatBinary(CUmodule module, byte fatCubin[])
{
return checkResult(cuModuleLoadFatBinaryNative(module, fatCubin));
}
private static native int cuModuleLoadFatBinaryNative(CUmodule module, byte fatCubin[]);
/**
* Unloads a module.
*
*
* CUresult cuModuleUnload (
* CUmodule hmod )
*
*
* Unloads a module. Unloads a module hmod from the current context.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hmod Module to unload
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
*/
public static int cuModuleUnload(CUmodule hmod)
{
return checkResult(cuModuleUnloadNative(hmod));
}
private static native int cuModuleUnloadNative(CUmodule hmod);
/**
* Returns a function handle.
*
*
* CUresult cuModuleGetFunction (
* CUfunction* hfunc,
* CUmodule hmod,
* const char* name )
*
*
* Returns a function handle. Returns in
* *hfunc the handle of the function of name name
* located in module hmod. If no function of that name exists,
* cuModuleGetFunction() returns CUDA_ERROR_NOT_FOUND.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Returned function handle
* @param hmod Module to retrieve function from
* @param name Name of function to retrieve
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_NOT_FOUND
*
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name)
{
return checkResult(cuModuleGetFunctionNative(hfunc, hmod, name));
}
private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name);
/**
* Returns a global pointer from a module.
*
*
* CUresult cuModuleGetGlobal (
* CUdeviceptr* dptr,
* size_t* bytes,
* CUmodule hmod,
* const char* name )
*
*
* Returns a global pointer from a module.
* Returns in *dptr and *bytes the base pointer and
* size of the global of name name located in module hmod. If no variable of that name exists, cuModuleGetGlobal()
* returns CUDA_ERROR_NOT_FOUND. Both parameters dptr and bytes are optional. If one of them is NULL, it is ignored.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Returned global device pointer
* @param bytes Returned global size in bytes
* @param hmod Module to retrieve global from
* @param name Name of global to retrieve
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_NOT_FOUND
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleGetGlobal(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name)
{
return checkResult(cuModuleGetGlobalNative(dptr, bytes, hmod, name));
}
private static native int cuModuleGetGlobalNative(CUdeviceptr dptr, long bytes[], CUmodule hmod, String name);
/**
* Returns a handle to a texture reference.
*
*
* CUresult cuModuleGetTexRef (
* CUtexref* pTexRef,
* CUmodule hmod,
* const char* name )
*
*
* Returns a handle to a texture reference.
* Returns in *pTexRef the handle of the texture reference of
* name name in the module hmod. If no texture
* reference of that name exists, cuModuleGetTexRef() returns
* CUDA_ERROR_NOT_FOUND. This texture reference handle should not be
* destroyed, since it will be destroyed when the module is unloaded.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pTexRef Returned texture reference
* @param hmod Module to retrieve texture reference from
* @param name Name of texture reference to retrieve
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_NOT_FOUND
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetSurfRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleGetTexRef(CUtexref pTexRef, CUmodule hmod, String name)
{
return checkResult(cuModuleGetTexRefNative(pTexRef, hmod, name));
}
private static native int cuModuleGetTexRefNative(CUtexref pTexRef, CUmodule hmod, String name);
/**
* Returns a handle to a surface reference.
*
*
* CUresult cuModuleGetSurfRef (
* CUsurfref* pSurfRef,
* CUmodule hmod,
* const char* name )
*
*
* Returns a handle to a surface reference.
* Returns in *pSurfRef the handle of the surface reference of
* name name in the module hmod. If no surface
* reference of that name exists, cuModuleGetSurfRef() returns
* CUDA_ERROR_NOT_FOUND.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pSurfRef Returned surface reference
* @param hmod Module to retrieve surface reference from
* @param name Name of surface reference to retrieve
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_NOT_FOUND
*
* @see JCudaDriver#cuModuleGetFunction
* @see JCudaDriver#cuModuleGetGlobal
* @see JCudaDriver#cuModuleGetTexRef
* @see JCudaDriver#cuModuleLoad
* @see JCudaDriver#cuModuleLoadData
* @see JCudaDriver#cuModuleLoadDataEx
* @see JCudaDriver#cuModuleLoadFatBinary
* @see JCudaDriver#cuModuleUnload
*/
public static int cuModuleGetSurfRef(CUsurfref pSurfRef, CUmodule hmod, String name)
{
return checkResult(cuModuleGetSurfRefNative(pSurfRef, hmod, name));
}
private static native int cuModuleGetSurfRefNative(CUsurfref pSurfRef, CUmodule hmod, String name);
public static int cuLinkCreate(JITOptions jitOptions, CUlinkState stateOut)
{
return checkResult(cuLinkCreateNative(jitOptions, stateOut));
}
private static native int cuLinkCreateNative(JITOptions jitOptions, CUlinkState stateOut);
public static int cuLinkAddData(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions)
{
return checkResult(cuLinkAddDataNative(state, type, data, size, name, jitOptions));
}
private static native int cuLinkAddDataNative(CUlinkState state, int type, Pointer data, long size, String name, JITOptions jitOptions);
public static int cuLinkAddFile(CUlinkState state, int type, String path, JITOptions jitOptions)
{
return checkResult(cuLinkAddFileNative(state, type, path, jitOptions));
}
private static native int cuLinkAddFileNative(CUlinkState state, int type, String path, JITOptions jitOptions);
public static int cuLinkComplete(CUlinkState state, Pointer cubinOut, long sizeOut[])
{
return checkResult(cuLinkCompleteNative(state, cubinOut, sizeOut));
}
private static native int cuLinkCompleteNative(CUlinkState state, Pointer cubinOut, long sizeOut[]);
public static int cuLinkDestroy(CUlinkState state)
{
return checkResult(cuLinkDestroyNative(state));
}
private static native int cuLinkDestroyNative(CUlinkState state);
/**
* Gets free and total memory.
*
*
* CUresult cuMemGetInfo (
* size_t* free,
* size_t* total )
*
*
* Gets free and total memory. Returns in
* *free and *total respectively, the free and total
* amount of memory available for allocation by the CUDA context, in
* bytes.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param free Returned free memory in bytes
* @param total Returned total memory in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemGetInfo(long free[], long total[])
{
return checkResult(cuMemGetInfoNative(free, total));
}
private static native int cuMemGetInfoNative(long free[], long total[]);
/**
* Allocates page-locked host memory.
*
*
* CUresult cuMemHostAlloc (
* void** pp,
* size_t bytesize,
* unsigned int Flags )
*
*
* Allocates page-locked host memory.
* Allocates bytesize bytes of host memory that is page-locked
* and accessible to the device. The driver tracks the virtual memory
* ranges allocated
* with this function and automatically
* accelerates calls to functions such as cuMemcpyHtoD(). Since the memory
* can be accessed directly by the device, it can be read or written with
* much higher bandwidth than pageable
* memory obtained with functions such as
* malloc(). Allocating excessive amounts of pinned memory may degrade
* system performance,
* since it reduces the amount of memory
* available to the system for paging. As a result, this function is best
* used sparingly
* to allocate staging areas for data
* exchange between host and device.
*
* The Flags parameter enables
* different options to be specified that affect the allocation, as
* follows.
*
*
* -
*
CU_MEMHOSTALLOC_PORTABLE: The
* memory returned by this call will be considered as pinned memory by
* all CUDA contexts, not just the one that performed
* the allocation.
*
*
*
*
*
* -
*
CU_MEMHOSTALLOC_DEVICEMAP: Maps
* the allocation into the CUDA address space. The device pointer to the
* memory may be obtained by calling cuMemHostGetDevicePointer(). This
* feature is available only on GPUs with compute capability greater than
* or equal to 1.1.
*
*
*
*
*
* -
*
CU_MEMHOSTALLOC_WRITECOMBINED:
* Allocates the memory as write-combined (WC). WC memory can be
* transferred across the PCI Express bus more quickly on some
* system configurations, but
* cannot be read efficiently by most CPUs. WC memory is a good option
* for buffers that will be written
* by the CPU and read by the GPU
* via mapped pinned memory or host->device transfers.
*
*
*
*
* All of these flags are orthogonal to
* one another: a developer may allocate memory that is portable, mapped
* and/or write-combined
* with no restrictions.
*
* The CUDA context must have been created
* with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTALLOC_DEVICEMAP
* flag to have any effect.
*
* The CU_MEMHOSTALLOC_DEVICEMAP flag may
* be specified on CUDA contexts for devices that do not support mapped
* pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
* because the memory may be mapped into other CUDA contexts via the
* CU_MEMHOSTALLOC_PORTABLE flag.
*
* The memory allocated by this function
* must be freed with cuMemFreeHost().
*
* Note all host memory allocated using
* cuMemHostAlloc() will automatically be immediately accessible to all
* contexts on all devices which support unified addressing (as may be
* queried
* using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
* Unless the flag CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device
* pointer that may be used to access this host memory from those contexts
* is always equal to the returned
* host pointer *pp. If the flag
* CU_MEMHOSTALLOC_WRITECOMBINED is specified, then the function
* cuMemHostGetDevicePointer() must be used to query the device pointer,
* even if the context supports unified addressing. See Unified Addressing
* for additional details.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pp Returned host pointer to page-locked memory
* @param bytesize Requested allocation size in bytes
* @param Flags Flags for allocation request
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemHostAlloc(Pointer pp, long bytes, int Flags)
{
return checkResult(cuMemHostAllocNative(pp, bytes, Flags));
}
private static native int cuMemHostAllocNative(Pointer pp, long bytes, int Flags);
/**
* Passes back device pointer of mapped pinned memory.
*
*
* CUresult cuMemHostGetDevicePointer (
* CUdeviceptr* pdptr,
* void* p,
* unsigned int Flags )
*
*
* Passes back device pointer of mapped
* pinned memory. Passes back the device pointer pdptr
* corresponding to the mapped, pinned host buffer p allocated
* by cuMemHostAlloc.
*
* cuMemHostGetDevicePointer() will fail
* if the CU_MEMHOSTALLOC_DEVICEMAP flag was not specified at the time
* the memory was allocated, or if the function is called on a GPU that
* does not support
* mapped pinned memory.
*
* Flags provides for future
* releases. For now, it must be set to 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pdptr Returned device pointer
* @param p Host pointer
* @param Flags Options (must be 0)
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemHostGetDevicePointer(CUdeviceptr ret, Pointer p, int Flags)
{
return checkResult(cuMemHostGetDevicePointerNative(ret, p, Flags));
}
private static native int cuMemHostGetDevicePointerNative(CUdeviceptr ret, Pointer p, int Flags);
/**
* Passes back flags that were used for a pinned allocation.
*
*
* CUresult cuMemHostGetFlags (
* unsigned int* pFlags,
* void* p )
*
*
* Passes back flags that were used for a
* pinned allocation. Passes back the flags pFlags that were
* specified when allocating the pinned host buffer p allocated
* by cuMemHostAlloc.
*
* cuMemHostGetFlags() will fail if the
* pointer does not reside in an allocation performed by cuMemAllocHost()
* or cuMemHostAlloc().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pFlags Returned flags word
* @param p Host pointer
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemHostAlloc
*/
public static int cuMemHostGetFlags (int pFlags[], Pointer p)
{
return checkResult(cuMemHostGetFlagsNative(pFlags, p));
}
private static native int cuMemHostGetFlagsNative(int pFlags[], Pointer p);
/**
* Returns a handle to a compute device.
*
*
* CUresult cuDeviceGetByPCIBusId (
* CUdevice* dev,
* char* pciBusId )
*
*
* Returns a handle to a compute device.
* Returns in *device a device handle given a PCI bus ID
* string.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dev Returned device handle
* @param pciBusId String in one of the following forms: [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetPCIBusId
*/
public static int cuDeviceGetByPCIBusId(CUdevice dev, String pciBusId)
{
return checkResult(cuDeviceGetByPCIBusIdNative(dev, pciBusId));
}
private static native int cuDeviceGetByPCIBusIdNative(CUdevice dev, String pciBusId);
public static int cuMemAllocManaged(CUdeviceptr dptr, long bytesize, int flags)
{
return checkResult(cuMemAllocManagedNative(dptr, bytesize, flags));
}
private static native int cuMemAllocManagedNative(CUdeviceptr dptr, long bytesize, int flags);
/**
* Returns a PCI Bus Id string for the device.
*
*
* CUresult cuDeviceGetPCIBusId (
* char* pciBusId,
* int len,
* CUdevice dev )
*
*
* Returns a PCI Bus Id string for the
* device. Returns an ASCII string identifying the device dev
* in the NULL-terminated string pointed to by pciBusId. len specifies the maximum length of the string that may be
* returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pciBusId Returned identifier string for the device in the following format [domain]:[bus]:[device].[function] where domain, bus, device, and function are all hexadecimal values. pciBusId should be large enough to store 13 characters including the NULL-terminator.
* @param len Maximum length of string to store in name
* @param dev Device to get identifier string for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuDeviceGet
* @see JCudaDriver#cuDeviceGetAttribute
* @see JCudaDriver#cuDeviceGetByPCIBusId
*/
public static int cuDeviceGetPCIBusId(String pciBusId[], int len, CUdevice dev)
{
return checkResult(cuDeviceGetPCIBusIdNative(pciBusId, len, dev));
}
private static native int cuDeviceGetPCIBusIdNative(String pciBusId[], int len, CUdevice dev);
/**
* Gets an interprocess handle for a previously allocated event.
*
*
* CUresult cuIpcGetEventHandle (
* CUipcEventHandle* pHandle,
* CUevent event )
*
*
* Gets an interprocess handle for a
* previously allocated event. Takes as input a previously allocated
* event. This event must
* have been created with the
* CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags set. This
* opaque handle may be copied into other processes and opened with
* cuIpcOpenEventHandle to allow efficient hardware synchronization
* between GPU work in different processes.
*
* After the event has been been opened in
* the importing process, cuEventRecord, cuEventSynchronize,
* cuStreamWaitEvent and cuEventQuery may be used in either process.
* Performing operations on the imported event after the exported event
* has been freed with cuEventDestroy will result in undefined behavior.
*
* IPC functionality is restricted to
* devices with support for unified addressing on Linux operating
* systems.
*
*
*
* @param pHandle Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle
* @param event Event allocated with CU_EVENT_INTERPROCESS and CU_EVENT_DISABLE_TIMING flags.
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
* CUDA_ERROR_MAP_FAILED
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuIpcOpenEventHandle
* @see JCudaDriver#cuIpcGetMemHandle
* @see JCudaDriver#cuIpcOpenMemHandle
* @see JCudaDriver#cuIpcCloseMemHandle
*/
public static int cuIpcGetEventHandle(CUipcEventHandle pHandle, CUevent event)
{
return checkResult(cuIpcGetEventHandleNative(pHandle, event));
}
private static native int cuIpcGetEventHandleNative(CUipcEventHandle pHandle, CUevent event);
/**
* Opens an interprocess event handle for use in the current process.
*
*
* CUresult cuIpcOpenEventHandle (
* CUevent* phEvent,
* CUipcEventHandle handle )
*
*
* Opens an interprocess event handle for
* use in the current process. Opens an interprocess event handle exported
* from another
* process with cuIpcGetEventHandle. This
* function returns a CUevent that behaves like a locally created event
* with the CU_EVENT_DISABLE_TIMING flag specified. This event must be
* freed with cuEventDestroy.
*
* Performing operations on the imported
* event after the exported event has been freed with cuEventDestroy will
* result in undefined behavior.
*
* IPC functionality is restricted to
* devices with support for unified addressing on Linux operating
* systems.
*
*
*
* @param phEvent Returns the imported event
* @param handle Interprocess handle to open
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
* CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuIpcGetEventHandle
* @see JCudaDriver#cuIpcGetMemHandle
* @see JCudaDriver#cuIpcOpenMemHandle
* @see JCudaDriver#cuIpcCloseMemHandle
*/
public static int cuIpcOpenEventHandle(CUevent phEvent, CUipcEventHandle handle)
{
return checkResult(cuIpcOpenEventHandleNative(phEvent, handle));
}
private static native int cuIpcOpenEventHandleNative(CUevent phEvent, CUipcEventHandle handle);
/**
* Gets an interprocess memory handle for an existing device memory
* allocation.
*
*
* CUresult cuIpcGetMemHandle (
* CUipcMemHandle* pHandle,
* CUdeviceptr dptr )
*
*
* /brief Gets an interprocess memory
* handle for an existing device memory allocation
*
* Takes a pointer to the base of an
* existing device memory allocation created with cuMemAlloc and exports
* it for use in another process. This is a lightweight operation and may
* be called multiple times on an allocation
* without adverse effects.
*
* If a region of memory is freed with
* cuMemFree and a subsequent call to cuMemAlloc returns memory with the
* same device address, cuIpcGetMemHandle will return a unique handle for
* the new memory.
*
* IPC functionality is restricted to
* devices with support for unified addressing on Linux operating
* systems.
*
*
*
* @param pHandle Pointer to user allocated CUipcMemHandle to return the handle in.
* @param dptr Base pointer to previously allocated device memory
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_OUT_OF_MEMORY,
* CUDA_ERROR_MAP_FAILED,
*
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuIpcGetEventHandle
* @see JCudaDriver#cuIpcOpenEventHandle
* @see JCudaDriver#cuIpcOpenMemHandle
* @see JCudaDriver#cuIpcCloseMemHandle
*/
public static int cuIpcGetMemHandle(CUipcMemHandle pHandle, CUdeviceptr dptr)
{
return checkResult(cuIpcGetMemHandleNative(pHandle, dptr));
}
private static native int cuIpcGetMemHandleNative(CUipcMemHandle pHandle, CUdeviceptr dptr);
/**
*
*
* CUresult cuIpcOpenMemHandle (
* CUdeviceptr* pdptr,
* CUipcMemHandle handle,
* unsigned int Flags )
*
*
* /brief Opens an interprocess memory
* handle exported from another process and returns a device pointer
* usable in the local
* process.
*
* Maps memory exported from another
* process with cuIpcGetMemHandle into the current device address space.
* For contexts on different devices cuIpcOpenMemHandle can attempt to
* enable peer access between the devices as if the user called
* cuCtxEnablePeerAccess. This behavior is controlled by the
* CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. cuDeviceCanAccessPeer can
* determine if a mapping is possible.
*
* Contexts that may open CUipcMemHandles
* are restricted in the following way. CUipcMemHandles from each CUdevice
* in a given process may only be opened by one CUcontext per CUdevice
* per other process.
*
* Memory returned from cuIpcOpenMemHandle
* must be freed with cuIpcCloseMemHandle.
*
* Calling cuMemFree on an exported memory
* region before calling cuIpcCloseMemHandle in the importing context will
* result in undefined behavior.
*
* IPC functionality is restricted to
* devices with support for unified addressing on Linux operating
* systems.
*
*
*
* @param pdptr Returned device pointer
* @param handle CUipcMemHandle to open
* @param Flags Flags for this operation. Must be specified as CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
* CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_TOO_MANY_PEERS
*
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuIpcGetEventHandle
* @see JCudaDriver#cuIpcOpenEventHandle
* @see JCudaDriver#cuIpcGetMemHandle
* @see JCudaDriver#cuIpcCloseMemHandle
* @see JCudaDriver#cuCtxEnablePeerAccess
* @see JCudaDriver#cuDeviceCanAccessPeer
*/
public static int cuIpcOpenMemHandle(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags)
{
return checkResult(cuIpcOpenMemHandleNative(pdptr, handle, Flags));
}
private static native int cuIpcOpenMemHandleNative(CUdeviceptr pdptr, CUipcMemHandle handle, int Flags);
/**
* Close memory mapped with cuIpcOpenMemHandle.
*
*
* CUresult cuIpcCloseMemHandle (
* CUdeviceptr dptr )
*
*
* Close memory mapped with cuIpcOpenMemHandle.
* Unmaps memory returnd by cuIpcOpenMemHandle. The original allocation
* in the exporting process as well as imported mappings in other processes
* will be unaffected.
*
* Any resources used to enable peer access
* will be freed if this is the last mapping using them.
*
* IPC functionality is restricted to
* devices with support for unified addressing on Linux operating
* systems.
*
*
*
* @param dptr Device pointer returned by cuIpcOpenMemHandle
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_MAP_FAILED,
* CUDA_ERROR_INVALID_HANDLE,
*
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuIpcGetEventHandle
* @see JCudaDriver#cuIpcOpenEventHandle
* @see JCudaDriver#cuIpcGetMemHandle
* @see JCudaDriver#cuIpcOpenMemHandle
*/
public static int cuIpcCloseMemHandle(CUdeviceptr dptr)
{
return checkResult(cuIpcCloseMemHandleNative(dptr));
}
private static native int cuIpcCloseMemHandleNative(CUdeviceptr dptr);
/**
* Registers an existing host memory range for use by CUDA.
*
*
* CUresult cuMemHostRegister (
* void* p,
* size_t bytesize,
* unsigned int Flags )
*
*
* Registers an existing host memory range
* for use by CUDA. Page-locks the memory range specified by p
* and bytesize and maps it for the device(s) as specified by
* Flags. This memory range also is added to the same tracking
* mechanism as cuMemHostAlloc to automatically accelerate calls to
* functions such as cuMemcpyHtoD(). Since the memory can be accessed
* directly by the device, it can be read or written with much higher
* bandwidth than pageable
* memory that has not been registered.
* Page-locking excessive amounts of memory may degrade system performance,
* since it reduces
* the amount of memory available to the
* system for paging. As a result, this function is best used sparingly
* to register staging
* areas for data exchange between host and
* device.
*
* This function has limited support on
* Mac OS X. OS 10.7 or higher is required.
*
* The Flags parameter enables
* different options to be specified that affect the allocation, as
* follows.
*
*
* -
*
CU_MEMHOSTREGISTER_PORTABLE:
* The memory returned by this call will be considered as pinned memory
* by all CUDA contexts, not just the one that performed
* the allocation.
*
*
*
*
*
* -
*
CU_MEMHOSTREGISTER_DEVICEMAP:
* Maps the allocation into the CUDA address space. The device pointer to
* the memory may be obtained by calling cuMemHostGetDevicePointer(). This
* feature is available only on GPUs with compute capability greater than
* or equal to 1.1.
*
*
*
*
* All of these flags are orthogonal to
* one another: a developer may page-lock memory that is portable or
* mapped with no restrictions.
*
* The CUDA context must have been created
* with the CU_CTX_MAP_HOST flag in order for the CU_MEMHOSTREGISTER_DEVICEMAP
* flag to have any effect.
*
* The CU_MEMHOSTREGISTER_DEVICEMAP flag
* may be specified on CUDA contexts for devices that do not support
* mapped pinned memory. The failure is deferred to cuMemHostGetDevicePointer()
* because the memory may be mapped into other CUDA contexts via the
* CU_MEMHOSTREGISTER_PORTABLE flag.
*
* The memory page-locked by this function
* must be unregistered with cuMemHostUnregister().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param p Host pointer to memory to page-lock
* @param bytesize Size in bytes of the address range to page-lock
* @param Flags Flags for allocation request
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
*
* @see JCudaDriver#cuMemHostUnregister
* @see JCudaDriver#cuMemHostGetFlags
* @see JCudaDriver#cuMemHostGetDevicePointer
*/
public static int cuMemHostRegister(Pointer p, long bytesize, int Flags)
{
return checkResult(cuMemHostRegisterNative(p, bytesize, Flags));
}
private static native int cuMemHostRegisterNative(Pointer p, long bytesize, int Flags);
/**
* Unregisters a memory range that was registered with cuMemHostRegister.
*
*
* CUresult cuMemHostUnregister (
* void* p )
*
*
* Unregisters a memory range that was
* registered with cuMemHostRegister. Unmaps the memory range whose base
* address is specified
* by p, and makes it pageable
* again.
*
* The base address must be the same one
* specified to cuMemHostRegister().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param p Host pointer to memory to unregister
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
*
* @see JCudaDriver#cuMemHostRegister
*/
public static int cuMemHostUnregister(Pointer p)
{
return checkResult(cuMemHostUnregisterNative(p));
}
private static native int cuMemHostUnregisterNative(Pointer p);
/**
* Copies memory.
*
*
* CUresult cuMemcpy (
* CUdeviceptr dst,
* CUdeviceptr src,
* size_t ByteCount )
*
*
* Copies memory. Copies data between two
* pointers. dst and src are base pointers of the
* destination and source, respectively. ByteCount specifies
* the number of bytes to copy. Note that this function infers the type
* of the transfer (host to host, host to device,
* device to device, or device to host) from
* the pointer values. This function is only allowed in contexts which
* support unified
* addressing. Note that this function is
* synchronous.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dst Destination unified virtual address space pointer
* @param src Source unified virtual address space pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpy(CUdeviceptr dst, CUdeviceptr src, long ByteCount)
{
return checkResult(cuMemcpyNative(dst, src, ByteCount));
}
private static native int cuMemcpyNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount);
/**
* Copies device memory between two contexts.
*
*
* CUresult cuMemcpyPeer (
* CUdeviceptr dstDevice,
* CUcontext dstContext,
* CUdeviceptr srcDevice,
* CUcontext srcContext,
* size_t ByteCount )
*
*
* Copies device memory between two contexts.
* Copies from device memory in one context to device memory in another
* context.
* dstDevice is the base device
* pointer of the destination memory and dstContext is the
* destination context. srcDevice is the base device pointer of
* the source memory and srcContext is the source pointer. ByteCount specifies the number of bytes to copy.
*
* Note that this function is asynchronous
* with respect to the host, but serialized with respect all pending and
* future asynchronous
* work in to the current context, srcContext, and dstContext (use cuMemcpyPeerAsync to
* avoid this synchronization).
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstContext Destination context
* @param srcDevice Source device pointer
* @param srcContext Source context
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpy3DPeer
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyPeerAsync
* @see JCudaDriver#cuMemcpy3DPeerAsync
*/
public static int cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount)
{
return cuMemcpyPeerNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount);
}
private static native int cuMemcpyPeerNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount);
/**
* Allocates device memory.
*
*
* CUresult cuMemAlloc (
* CUdeviceptr* dptr,
* size_t bytesize )
*
*
* Allocates device memory. Allocates bytesize bytes of linear memory on the device and returns in *dptr a pointer to the allocated memory. The allocated memory is
* suitably aligned for any kind of variable. The memory is not cleared.
* If bytesize is 0, cuMemAlloc()
* returns CUDA_ERROR_INVALID_VALUE.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Returned device pointer
* @param bytesize Requested allocation size in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
{
return checkResult(cuMemAllocNative(dptr, bytesize));
}
private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize);
/**
* Allocates pitched device memory.
*
*
* CUresult cuMemAllocPitch (
* CUdeviceptr* dptr,
* size_t* pPitch,
* size_t WidthInBytes,
* size_t Height,
* unsigned int ElementSizeBytes )
*
*
* Allocates pitched device memory.
* Allocates at least WidthInBytes * Height bytes of
* linear memory on the device and returns in *dptr a pointer
* to the allocated memory. The function may pad the allocation to ensure
* that corresponding pointers in any given
* row will continue to meet the alignment
* requirements for coalescing as the address is updated from row to row.
* ElementSizeBytes specifies the size of the largest reads and
* writes that will be performed on the memory range. ElementSizeBytes may be 4, 8 or 16 (since coalesced memory
* transactions are not possible on other data sizes). If ElementSizeBytes is smaller than the actual read/write size of a
* kernel, the kernel will run correctly, but possibly at reduced speed.
* The
* pitch returned in *pPitch by
* cuMemAllocPitch() is the width in bytes of the allocation. The intended
* usage of pitch is as a separate parameter of the allocation, used to
* compute addresses within the 2D array.
* Given the row and column of an array element of type T,
* the address is computed as:
*
T* pElement = (T*)((char*)BaseAddress
* + Row * Pitch) + Column;
*
* The pitch returned by cuMemAllocPitch()
* is guaranteed to work with cuMemcpy2D() under all circumstances. For
* allocations of 2D arrays, it is recommended that programmers consider
* performing pitch allocations
* using cuMemAllocPitch(). Due to alignment
* restrictions in the hardware, this is especially true if the application
* will be performing 2D memory copies
* between different regions of device
* memory (whether linear memory or CUDA arrays).
*
* The byte alignment of the pitch returned
* by cuMemAllocPitch() is guaranteed to match or exceed the alignment
* requirement for texture binding with cuTexRefSetAddress2D().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Returned device pointer
* @param pPitch Returned pitch of allocation in bytes
* @param WidthInBytes Requested allocation width in bytes
* @param Height Requested allocation height in rows
* @param ElementSizeBytes Size of largest reads/writes for range
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemAllocPitch(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes)
{
return checkResult(cuMemAllocPitchNative(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes));
}
private static native int cuMemAllocPitchNative(CUdeviceptr dptr, long pPitch[], long WidthInBytes, long Height, int ElementSizeBytes);
/**
* Frees device memory.
*
*
* CUresult cuMemFree (
* CUdeviceptr dptr )
*
*
* Frees device memory. Frees the memory
* space pointed to by dptr, which must have been returned by a
* previous call to cuMemAlloc() or cuMemAllocPitch().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Pointer to memory to free
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemFree(CUdeviceptr dptr)
{
return checkResult(cuMemFreeNative(dptr));
}
private static native int cuMemFreeNative(CUdeviceptr dptr);
/**
* Get information on memory allocations.
*
*
* CUresult cuMemGetAddressRange (
* CUdeviceptr* pbase,
* size_t* psize,
* CUdeviceptr dptr )
*
*
* Get information on memory allocations.
* Returns the base address in *pbase and size in *psize
* of the allocation by cuMemAlloc() or cuMemAllocPitch() that contains
* the input pointer dptr. Both parameters pbase and
* psize are optional. If one of them is NULL, it is ignored.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pbase Returned base address
* @param psize Returned size of device memory allocation
* @param dptr Device pointer to query
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemGetAddressRange(CUdeviceptr pbase, long psize[], CUdeviceptr dptr)
{
return checkResult(cuMemGetAddressRangeNative(pbase, psize, dptr));
}
private static native int cuMemGetAddressRangeNative(CUdeviceptr pbase, long psize[], CUdeviceptr dptr);
/**
* Allocates page-locked host memory.
*
*
* CUresult cuMemAllocHost (
* void** pp,
* size_t bytesize )
*
*
* Allocates page-locked host memory.
* Allocates bytesize bytes of host memory that is page-locked
* and accessible to the device. The driver tracks the virtual memory
* ranges allocated
* with this function and automatically
* accelerates calls to functions such as cuMemcpy(). Since the memory
* can be accessed directly by the device, it can be read or written with
* much higher bandwidth than pageable
* memory obtained with functions such as
* malloc(). Allocating excessive amounts of memory with cuMemAllocHost()
* may degrade system performance, since it reduces the amount of memory
* available to the system for paging. As a result, this
* function is best used sparingly to
* allocate staging areas for data exchange between host and device.
*
* Note all host memory allocated using
* cuMemHostAlloc() will automatically be immediately accessible to all
* contexts on all devices which support unified addressing (as may be
* queried
* using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
* The device pointer that may be used to access this host memory from
* those contexts is always equal to the returned host
* pointer *pp. See Unified
* Addressing for additional details.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pp Returned host pointer to page-locked memory
* @param bytesize Requested allocation size in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemAllocHost(Pointer pointer, long bytesize)
{
return checkResult(cuMemAllocHostNative(pointer, bytesize));
}
private static native int cuMemAllocHostNative(Pointer pp, long bytesize);
/**
* Frees page-locked host memory.
*
*
* CUresult cuMemFreeHost (
* void* p )
*
*
* Frees page-locked host memory. Frees
* the memory space pointed to by p, which must have been
* returned by a previous call to cuMemAllocHost().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param p Pointer to memory to free
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemFreeHost(Pointer p)
{
return checkResult(cuMemFreeHostNative(p));
}
private static native int cuMemFreeHostNative(Pointer p);
/**
* Copies memory from Host to Device.
*
*
* CUresult cuMemcpyHtoD (
* CUdeviceptr dstDevice,
* const void* srcHost,
* size_t ByteCount )
*
*
* Copies memory from Host to Device.
* Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
* respectively. ByteCount specifies the number of bytes to
* copy. Note that this function is synchronous.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param srcHost Source host pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyHtoD(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount)
{
return checkResult(cuMemcpyHtoDNative(dstDevice, srcHost, ByteCount));
}
private static native int cuMemcpyHtoDNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount);
/**
* Copies memory from Device to Host.
*
*
* CUresult cuMemcpyDtoH (
* void* dstHost,
* CUdeviceptr srcDevice,
* size_t ByteCount )
*
*
* Copies memory from Device to Host.
* Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
* source, respectively. ByteCount specifies the number of bytes
* to copy. Note that this function is synchronous.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstHost Destination host pointer
* @param srcDevice Source device pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount)
{
return checkResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount));
}
private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount);
/**
* Copies memory from Device to Device.
*
*
* CUresult cuMemcpyDtoD (
* CUdeviceptr dstDevice,
* CUdeviceptr srcDevice,
* size_t ByteCount )
*
*
* Copies memory from Device to Device.
* Copies from device memory to device memory. dstDevice and
* srcDevice are the base pointers of the destination and
* source, respectively. ByteCount specifies the number of bytes
* to copy. Note that this function is asynchronous.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param srcDevice Source device pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount)
{
return checkResult(cuMemcpyDtoDNative(dstDevice, srcDevice, ByteCount));
}
private static native int cuMemcpyDtoDNative(CUdeviceptr dstDevice, CUdeviceptr srcDevice, long ByteCount);
/**
* Copies memory from Device to Array.
*
*
* CUresult cuMemcpyDtoA (
* CUarray dstArray,
* size_t dstOffset,
* CUdeviceptr srcDevice,
* size_t ByteCount )
*
*
* Copies memory from Device to Array.
* Copies from device memory to a 1D CUDA array. dstArray and
* dstOffset specify the CUDA array handle and starting index
* of the destination data. srcDevice specifies the base pointer
* of the source. ByteCount specifies the number of bytes to
* copy.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstArray Destination array
* @param dstOffset Offset in bytes of destination array
* @param srcDevice Source device pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyDtoA(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount)
{
return checkResult(cuMemcpyDtoANative(dstArray, dstIndex, srcDevice, ByteCount));
}
private static native int cuMemcpyDtoANative(CUarray dstArray, long dstIndex, CUdeviceptr srcDevice, long ByteCount);
/**
* Copies memory from Array to Device.
*
*
* CUresult cuMemcpyAtoD (
* CUdeviceptr dstDevice,
* CUarray srcArray,
* size_t srcOffset,
* size_t ByteCount )
*
*
* Copies memory from Array to Device.
* Copies from one 1D CUDA array to device memory. dstDevice
* specifies the base pointer of the destination and must be naturally
* aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array handle and the offset in bytes
* into the array where the copy is to begin. ByteCount specifies
* the number of bytes to copy and must be evenly divisible by the array
* element size.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param srcArray Source array
* @param srcOffset Offset in bytes of source array
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount)
{
return checkResult(cuMemcpyAtoDNative(dstDevice, hSrc, SrcIndex, ByteCount));
}
private static native int cuMemcpyAtoDNative(CUdeviceptr dstDevice, CUarray hSrc, long SrcIndex, long ByteCount);
/**
* Copies memory from Host to Array.
*
*
* CUresult cuMemcpyHtoA (
* CUarray dstArray,
* size_t dstOffset,
* const void* srcHost,
* size_t ByteCount )
*
*
* Copies memory from Host to Array. Copies
* from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
* bytes of the destination data. pSrc specifies the base
* address of the source. ByteCount specifies the number of
* bytes to copy.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstArray Destination array
* @param dstOffset Offset in bytes of destination array
* @param srcHost Source host pointer
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyHtoA(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount)
{
return checkResult(cuMemcpyHtoANative(dstArray, dstIndex, pSrc, ByteCount));
}
private static native int cuMemcpyHtoANative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount);
/**
* Copies memory from Array to Host.
*
*
* CUresult cuMemcpyAtoH (
* void* dstHost,
* CUarray srcArray,
* size_t srcOffset,
* size_t ByteCount )
*
*
* Copies memory from Array to Host. Copies
* from one 1D CUDA array to host memory. dstHost specifies the
* base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
* bytes of the source data. ByteCount specifies the number of
* bytes to copy.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstHost Destination device pointer
* @param srcArray Source array
* @param srcOffset Offset in bytes of source array
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyAtoH(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount)
{
return checkResult(cuMemcpyAtoHNative(dstHost, srcArray, srcIndex, ByteCount));
}
private static native int cuMemcpyAtoHNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount);
/**
* Copies memory from Array to Array.
*
*
* CUresult cuMemcpyAtoA (
* CUarray dstArray,
* size_t dstOffset,
* CUarray srcArray,
* size_t srcOffset,
* size_t ByteCount )
*
*
* Copies memory from Array to Array.
* Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and source CUDA
* arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source offsets in bytes
* into the CUDA arrays. ByteCount is the number of bytes to be
* copied. The size of the elements in the CUDA arrays need not be the
* same format, but the elements
* must be the same size; and count must be
* evenly divisible by that size.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstArray Destination array
* @param dstOffset Offset in bytes of destination array
* @param srcArray Source array
* @param srcOffset Offset in bytes of source array
* @param ByteCount Size of memory copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpyAtoA(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount)
{
return checkResult(cuMemcpyAtoANative(dstArray, dstIndex, srcArray, srcIndex, ByteCount));
}
private static native int cuMemcpyAtoANative(CUarray dstArray, long dstIndex, CUarray srcArray, long srcIndex, long ByteCount);
/**
* Copies memory for 2D arrays.
*
*
* CUresult cuMemcpy2D (
* const CUDA_MEMCPY2D* pCopy )
*
*
* Copies memory for 2D arrays. Perform a
* 2D memory copy according to the parameters specified in pCopy.
* The CUDA_MEMCPY2D structure is defined as:
*
* typedef struct CUDA_MEMCPY2D_st {
* unsigned int srcXInBytes, srcY;
* CUmemorytype srcMemoryType;
* const void *srcHost;
* CUdeviceptr srcDevice;
* CUarray srcArray;
* unsigned int srcPitch;
*
* unsigned int dstXInBytes, dstY;
* CUmemorytype dstMemoryType;
* void *dstHost;
* CUdeviceptr dstDevice;
* CUarray dstArray;
* unsigned int dstPitch;
*
* unsigned int WidthInBytes;
* unsigned int Height;
* } CUDA_MEMCPY2D;
* where:
*
* -
*
srcMemoryType and dstMemoryType
* specify the type of memory of the source and destination, respectively;
* CUmemorytype_enum
* is defined as:
*
*
*
*
* typedef enum CUmemorytype_enum {
* CU_MEMORYTYPE_HOST = 0x01,
* CU_MEMORYTYPE_DEVICE = 0x02,
* CU_MEMORYTYPE_ARRAY = 0x03,
* CU_MEMORYTYPE_UNIFIED = 0x04
* } CUmemorytype;
*
* If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
* srcDevice and srcPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. srcArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If srcMemoryType is CU_MEMORYTYPE_HOST,
* srcHost and srcPitch specify the (host) base address of the source data
* and the bytes per row to apply. srcArray is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_DEVICE,
* srcDevice and srcPitch specify the (device) base address of the source
* data and the bytes per row to apply. srcArray is
* ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_ARRAY,
* srcArray specifies the handle of the source data. srcHost, srcDevice
* and srcPitch are ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_HOST,
* dstHost and dstPitch specify the (host) base address of the destination
* data and the bytes per row to apply. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
* dstDevice and dstPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. dstArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If dstMemoryType is CU_MEMORYTYPE_DEVICE,
* dstDevice and dstPitch specify the (device) base address of the
* destination data and the bytes per row to apply. dstArray
* is ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_ARRAY,
* dstArray specifies the handle of the destination data. dstHost,
* dstDevice and dstPitch are ignored.
*
*
* -
*
srcXInBytes and srcY specify
* the base address of the source data for the copy.
*
*
*
*
* For host pointers, the starting address
* is
*
void* Start = (void*)((char*)srcHost+srcY*srcPitch +
* srcXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr Start =
* srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, srcXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
dstXInBytes and dstY specify
* the base address of the destination data for the copy.
*
*
*
*
* For host pointers, the base address is
*
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
* dstXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr dstStart =
* dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, dstXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
WidthInBytes and Height specify
* the width (in bytes) and height of the 2D copy being performed.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
*
*
* cuMemcpy2D() returns an error if any
* pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
* cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
* On intra-device memory copies (device to device, CUDA array to device,
* CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
* computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
* restriction, but may run significantly slower in the cases where
* cuMemcpy2D() would have returned an error code.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpy2D(CUDA_MEMCPY2D pCopy)
{
return checkResult(cuMemcpy2DNative(pCopy));
}
private static native int cuMemcpy2DNative(CUDA_MEMCPY2D pCopy);
/**
* Copies memory for 2D arrays.
*
*
* CUresult cuMemcpy2DUnaligned (
* const CUDA_MEMCPY2D* pCopy )
*
*
* Copies memory for 2D arrays. Perform a
* 2D memory copy according to the parameters specified in pCopy.
* The CUDA_MEMCPY2D structure is defined as:
*
* typedef struct CUDA_MEMCPY2D_st {
* unsigned int srcXInBytes, srcY;
* CUmemorytype srcMemoryType;
* const void *srcHost;
* CUdeviceptr srcDevice;
* CUarray srcArray;
* unsigned int srcPitch;
* unsigned int dstXInBytes, dstY;
* CUmemorytype dstMemoryType;
* void *dstHost;
* CUdeviceptr dstDevice;
* CUarray dstArray;
* unsigned int dstPitch;
* unsigned int WidthInBytes;
* unsigned int Height;
* } CUDA_MEMCPY2D;
* where:
*
* -
*
srcMemoryType and dstMemoryType
* specify the type of memory of the source and destination, respectively;
* CUmemorytype_enum
* is defined as:
*
*
*
*
* typedef enum CUmemorytype_enum {
* CU_MEMORYTYPE_HOST = 0x01,
* CU_MEMORYTYPE_DEVICE = 0x02,
* CU_MEMORYTYPE_ARRAY = 0x03,
* CU_MEMORYTYPE_UNIFIED = 0x04
* } CUmemorytype;
*
* If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
* srcDevice and srcPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. srcArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If srcMemoryType is CU_MEMORYTYPE_HOST,
* srcHost and srcPitch specify the (host) base address of the source data
* and the bytes per row to apply. srcArray is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_DEVICE,
* srcDevice and srcPitch specify the (device) base address of the source
* data and the bytes per row to apply. srcArray is
* ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_ARRAY,
* srcArray specifies the handle of the source data. srcHost, srcDevice
* and srcPitch are ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
* dstDevice and dstPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. dstArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If dstMemoryType is CU_MEMORYTYPE_HOST,
* dstHost and dstPitch specify the (host) base address of the destination
* data and the bytes per row to apply. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_DEVICE,
* dstDevice and dstPitch specify the (device) base address of the
* destination data and the bytes per row to apply. dstArray
* is ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_ARRAY,
* dstArray specifies the handle of the destination data. dstHost,
* dstDevice and dstPitch are ignored.
*
*
* -
*
srcXInBytes and srcY specify
* the base address of the source data for the copy.
*
*
*
*
* For host pointers, the starting address
* is
*
void* Start = (void*)((char*)srcHost+srcY*srcPitch +
* srcXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr Start =
* srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, srcXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
dstXInBytes and dstY specify
* the base address of the destination data for the copy.
*
*
*
*
* For host pointers, the base address is
*
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
* dstXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr dstStart =
* dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, dstXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
WidthInBytes and Height specify
* the width (in bytes) and height of the 2D copy being performed.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
*
*
* cuMemcpy2D() returns an error if any
* pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
* cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
* On intra-device memory copies (device to device, CUDA array to device,
* CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
* computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
* restriction, but may run significantly slower in the cases where
* cuMemcpy2D() would have returned an error code.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpy2DUnaligned(CUDA_MEMCPY2D pCopy)
{
return checkResult(cuMemcpy2DUnalignedNative(pCopy));
}
private static native int cuMemcpy2DUnalignedNative(CUDA_MEMCPY2D pCopy);
/**
* Copies memory for 3D arrays.
*
*
* CUresult cuMemcpy3D (
* const CUDA_MEMCPY3D* pCopy )
*
*
* Copies memory for 3D arrays. Perform a
* 3D memory copy according to the parameters specified in pCopy.
* The CUDA_MEMCPY3D structure is defined as:
*
* typedef struct CUDA_MEMCPY3D_st
* {
*
* unsigned int srcXInBytes, srcY, srcZ;
* unsigned int srcLOD;
* CUmemorytype srcMemoryType;
* const void *srcHost;
* CUdeviceptr srcDevice;
* CUarray srcArray;
* unsigned int srcPitch; // ignored when src is array
* unsigned int srcHeight; // ignored when src is array;
* may be 0 if Depth==1
*
* unsigned int dstXInBytes, dstY, dstZ;
* unsigned int dstLOD;
* CUmemorytype dstMemoryType;
* void *dstHost;
* CUdeviceptr dstDevice;
* CUarray dstArray;
* unsigned int dstPitch; // ignored when dst is array
* unsigned int dstHeight; // ignored when dst is array;
* may be 0 if Depth==1
*
* unsigned int WidthInBytes;
* unsigned int Height;
* unsigned int Depth;
* } CUDA_MEMCPY3D;
* where:
*
* -
*
srcMemoryType and dstMemoryType
* specify the type of memory of the source and destination, respectively;
* CUmemorytype_enum
* is defined as:
*
*
*
*
* typedef enum CUmemorytype_enum {
* CU_MEMORYTYPE_HOST = 0x01,
* CU_MEMORYTYPE_DEVICE = 0x02,
* CU_MEMORYTYPE_ARRAY = 0x03,
* CU_MEMORYTYPE_UNIFIED = 0x04
* } CUmemorytype;
*
* If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
* srcDevice and srcPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. srcArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If srcMemoryType is CU_MEMORYTYPE_HOST,
* srcHost, srcPitch and srcHeight specify the (host) base address of the
* source data, the bytes per row, and the height of
* each 2D slice of the 3D array. srcArray
* is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_DEVICE,
* srcDevice, srcPitch and srcHeight specify the (device) base address of
* the source data, the bytes per row, and the height
* of each 2D slice of the 3D array. srcArray
* is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_ARRAY,
* srcArray specifies the handle of the source data. srcHost, srcDevice,
* srcPitch and srcHeight are ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
* dstDevice and dstPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. dstArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If dstMemoryType is CU_MEMORYTYPE_HOST,
* dstHost and dstPitch specify the (host) base address of the destination
* data, the bytes per row, and the height of each
* 2D slice of the 3D array. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_DEVICE,
* dstDevice and dstPitch specify the (device) base address of the
* destination data, the bytes per row, and the height of each
* 2D slice of the 3D array. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_ARRAY,
* dstArray specifies the handle of the destination data. dstHost,
* dstDevice, dstPitch and dstHeight are ignored.
*
*
* -
*
srcXInBytes, srcY and srcZ
* specify the base address of the source data for the copy.
*
*
*
*
* For host pointers, the starting address
* is
*
void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
* + srcXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr Start =
* srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*
* For CUDA arrays, srcXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
dstXInBytes, dstY and dstZ
* specify the base address of the destination data for the copy.
*
*
*
*
* For host pointers, the base address is
*
void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
* + dstXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr dstStart =
* dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*
* For CUDA arrays, dstXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
WidthInBytes, Height and Depth
* specify the width (in bytes), height and depth of the 3D copy being
* performed.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
* -
*
If specified, srcHeight must
* be greater than or equal to Height + srcY, and dstHeight must be
* greater than or equal to Height
* + dstY.
*
*
*
*
* cuMemcpy3D() returns an error if any
* pitch is greater than the maximum allowed
* (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
*
*
* The srcLOD and dstLOD members of the
* CUDA_MEMCPY3D structure must be set to 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemcpy3D(CUDA_MEMCPY3D pCopy)
{
return checkResult(cuMemcpy3DNative(pCopy));
}
private static native int cuMemcpy3DNative(CUDA_MEMCPY3D pCopy);
/**
* Copies memory between contexts.
*
*
* CUresult cuMemcpy3DPeer (
* const CUDA_MEMCPY3D_PEER* pCopy )
*
*
* Copies memory between contexts. Perform
* a 3D memory copy according to the parameters specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER structure
* for documentation of its parameters.
*
* Note that this function is synchronous
* with respect to the host only if the source or destination memory is
* of type CU_MEMORYTYPE_HOST. Note also that this copy is serialized with
* respect all pending and future asynchronous work in to the current
* context,
* the copy's source context, and the copy's
* destination context (use cuMemcpy3DPeerAsync to avoid this
* synchronization).
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyPeer
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyPeerAsync
* @see JCudaDriver#cuMemcpy3DPeerAsync
*/
public static int cuMemcpy3DPeer(CUDA_MEMCPY3D_PEER pCopy)
{
return checkResult(cuMemcpy3DPeerNative(pCopy));
}
private static native int cuMemcpy3DPeerNative(CUDA_MEMCPY3D_PEER pCopy);
/**
* Copies memory asynchronously.
*
*
* CUresult cuMemcpyAsync (
* CUdeviceptr dst,
* CUdeviceptr src,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory asynchronously. Copies
* data between two pointers. dst and src are base
* pointers of the destination and source, respectively. ByteCount
* specifies the number of bytes to copy. Note that this function infers
* the type of the transfer (host to host, host to device,
* device to device, or device to host) from
* the pointer values. This function is only allowed in contexts which
* support unified
* addressing. Note that this function is
* asynchronous and can optionally be associated to a stream by passing a
* non-zero hStream argument
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dst Destination unified virtual address space pointer
* @param src Source unified virtual address space pointer
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyAsyncNative(dst, src, ByteCount, hStream));
}
private static native int cuMemcpyAsyncNative(CUdeviceptr dst, CUdeviceptr src, long ByteCount, CUstream hStream);
/**
* Copies device memory between two contexts asynchronously.
*
*
* CUresult cuMemcpyPeerAsync (
* CUdeviceptr dstDevice,
* CUcontext dstContext,
* CUdeviceptr srcDevice,
* CUcontext srcContext,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies device memory between two contexts
* asynchronously. Copies from device memory in one context to device
* memory in another
* context. dstDevice is the base
* device pointer of the destination memory and dstContext is
* the destination context. srcDevice is the base device pointer
* of the source memory and srcContext is the source pointer.
* ByteCount specifies the number of bytes to copy. Note that
* this function is asynchronous with respect to the host and all work in
* other
* streams in other devices.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstContext Destination context
* @param srcDevice Source device pointer
* @param srcContext Source context
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyPeer
* @see JCudaDriver#cuMemcpy3DPeer
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpy3DPeerAsync
*/
public static int cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyPeerAsyncNative(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream));
}
private static native int cuMemcpyPeerAsyncNative(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, long ByteCount, CUstream hStream);
/**
* Copies memory from Host to Device.
*
*
* CUresult cuMemcpyHtoDAsync (
* CUdeviceptr dstDevice,
* const void* srcHost,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory from Host to Device.
* Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source,
* respectively. ByteCount specifies the number of bytes to
* copy.
*
* cuMemcpyHtoDAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
* an error if a pointer to pageable memory is passed as input.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param srcHost Source host pointer
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyHtoDAsync(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyHtoDAsyncNative(dstDevice, srcHost, ByteCount, hStream));
}
private static native int cuMemcpyHtoDAsyncNative(CUdeviceptr dstDevice, Pointer srcHost, long ByteCount, CUstream hStream);
/**
* Copies memory from Device to Host.
*
*
* CUresult cuMemcpyDtoHAsync (
* void* dstHost,
* CUdeviceptr srcDevice,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory from Device to Host.
* Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and
* source, respectively. ByteCount specifies the number of bytes
* to copy.
*
* cuMemcpyDtoHAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
* an error if a pointer to pageable memory is passed as input.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstHost Destination host pointer
* @param srcDevice Source device pointer
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyDtoHAsync(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyDtoHAsyncNative(dstHost, srcDevice, ByteCount, hStream));
}
private static native int cuMemcpyDtoHAsyncNative(Pointer dstHost,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);
/**
* Copies memory from Device to Device.
*
*
* CUresult cuMemcpyDtoDAsync (
* CUdeviceptr dstDevice,
* CUdeviceptr srcDevice,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory from Device to Device.
* Copies from device memory to device memory. dstDevice and
* srcDevice are the base pointers of the destination and
* source, respectively. ByteCount specifies the number of bytes
* to copy. Note that this function is asynchronous and can optionally be
* associated to a stream
* by passing a non-zero hStream
* argument
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param srcDevice Source device pointer
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyDtoDAsync(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyDtoDAsyncNative(dstDevice, srcDevice, ByteCount, hStream));
}
private static native int cuMemcpyDtoDAsyncNative(CUdeviceptr dstDevice,CUdeviceptr srcDevice, long ByteCount, CUstream hStream);
/**
* Copies memory from Host to Array.
*
*
* CUresult cuMemcpyHtoAAsync (
* CUarray dstArray,
* size_t dstOffset,
* const void* srcHost,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory from Host to Array. Copies
* from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and starting offset in
* bytes of the destination data. srcHost specifies the base
* address of the source. ByteCount specifies the number of
* bytes to copy.
*
* cuMemcpyHtoAAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked memory and returns
* an error if a pointer to pageable memory is passed as input.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstArray Destination array
* @param dstOffset Offset in bytes of destination array
* @param srcHost Source host pointer
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyHtoAAsync(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyHtoAAsyncNative(dstArray, dstIndex, pSrc, ByteCount, hStream));
}
private static native int cuMemcpyHtoAAsyncNative(CUarray dstArray, long dstIndex, Pointer pSrc, long ByteCount, CUstream hStream);
/**
* Copies memory from Array to Host.
*
*
* CUresult cuMemcpyAtoHAsync (
* void* dstHost,
* CUarray srcArray,
* size_t srcOffset,
* size_t ByteCount,
* CUstream hStream )
*
*
* Copies memory from Array to Host. Copies
* from one 1D CUDA array to host memory. dstHost specifies the
* base pointer of the destination. srcArray and srcOffset specify the CUDA array handle and starting offset in
* bytes of the source data. ByteCount specifies the number of
* bytes to copy.
*
* cuMemcpyAtoHAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero stream argument. It only works on page-locked host memory and
* returns an error if a pointer to pageable memory is passed as input.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstHost Destination pointer
* @param srcArray Source array
* @param srcOffset Offset in bytes of source array
* @param ByteCount Size of memory copy in bytes
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpyAtoHAsync(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream)
{
return checkResult(cuMemcpyAtoHAsyncNative(dstHost, srcArray, srcIndex, ByteCount, hStream));
}
private static native int cuMemcpyAtoHAsyncNative(Pointer dstHost, CUarray srcArray, long srcIndex, long ByteCount, CUstream hStream);
/**
* Copies memory for 2D arrays.
*
*
* CUresult cuMemcpy2DAsync (
* const CUDA_MEMCPY2D* pCopy,
* CUstream hStream )
*
*
* Copies memory for 2D arrays. Perform a
* 2D memory copy according to the parameters specified in pCopy.
* The CUDA_MEMCPY2D structure is defined as:
*
* typedef struct CUDA_MEMCPY2D_st {
* unsigned int srcXInBytes, srcY;
* CUmemorytype srcMemoryType;
* const void *srcHost;
* CUdeviceptr srcDevice;
* CUarray srcArray;
* unsigned int srcPitch;
* unsigned int dstXInBytes, dstY;
* CUmemorytype dstMemoryType;
* void *dstHost;
* CUdeviceptr dstDevice;
* CUarray dstArray;
* unsigned int dstPitch;
* unsigned int WidthInBytes;
* unsigned int Height;
* } CUDA_MEMCPY2D;
* where:
*
* -
*
srcMemoryType and dstMemoryType
* specify the type of memory of the source and destination, respectively;
* CUmemorytype_enum
* is defined as:
*
*
*
*
* typedef enum CUmemorytype_enum {
* CU_MEMORYTYPE_HOST = 0x01,
* CU_MEMORYTYPE_DEVICE = 0x02,
* CU_MEMORYTYPE_ARRAY = 0x03,
* CU_MEMORYTYPE_UNIFIED = 0x04
* } CUmemorytype;
*
* If srcMemoryType is CU_MEMORYTYPE_HOST,
* srcHost and srcPitch specify the (host) base address of the source data
* and the bytes per row to apply. srcArray is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
* srcDevice and srcPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. srcArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If srcMemoryType is CU_MEMORYTYPE_DEVICE,
* srcDevice and srcPitch specify the (device) base address of the source
* data and the bytes per row to apply. srcArray is
* ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_ARRAY,
* srcArray specifies the handle of the source data. srcHost, srcDevice
* and srcPitch are ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
* dstDevice and dstPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. dstArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If dstMemoryType is CU_MEMORYTYPE_HOST,
* dstHost and dstPitch specify the (host) base address of the destination
* data and the bytes per row to apply. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_DEVICE,
* dstDevice and dstPitch specify the (device) base address of the
* destination data and the bytes per row to apply. dstArray
* is ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_ARRAY,
* dstArray specifies the handle of the destination data. dstHost,
* dstDevice and dstPitch are ignored.
*
*
* -
*
srcXInBytes and srcY specify
* the base address of the source data for the copy.
*
*
*
*
* For host pointers, the starting address
* is
*
void* Start = (void*)((char*)srcHost+srcY*srcPitch +
* srcXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr Start =
* srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, srcXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
dstXInBytes and dstY specify
* the base address of the destination data for the copy.
*
*
*
*
* For host pointers, the base address is
*
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch +
* dstXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr dstStart =
* dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, dstXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
WidthInBytes and Height specify
* the width (in bytes) and height of the 2D copy being performed.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
* -
*
If specified, srcHeight must
* be greater than or equal to Height + srcY, and dstHeight must be
* greater than or equal to Height
* + dstY.
*
*
*
*
* cuMemcpy2D() returns an error if any
* pitch is greater than the maximum allowed (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
* cuMemAllocPitch() passes back pitches that always work with cuMemcpy2D().
* On intra-device memory copies (device to device, CUDA array to device,
* CUDA array to CUDA array), cuMemcpy2D() may fail for pitches not
* computed by cuMemAllocPitch(). cuMemcpy2DUnaligned() does not have this
* restriction, but may run significantly slower in the cases where
* cuMemcpy2D() would have returned an error code.
*
* cuMemcpy2DAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
* returns an error if a pointer to pageable memory is passed as input.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy, CUstream hStream)
{
return checkResult(cuMemcpy2DAsyncNative(pCopy, hStream));
}
private static native int cuMemcpy2DAsyncNative(CUDA_MEMCPY2D pCopy, CUstream hStream);
/**
* Copies memory for 3D arrays.
*
*
* CUresult cuMemcpy3DAsync (
* const CUDA_MEMCPY3D* pCopy,
* CUstream hStream )
*
*
* Copies memory for 3D arrays. Perform a
* 3D memory copy according to the parameters specified in pCopy.
* The CUDA_MEMCPY3D structure is defined as:
*
* typedef struct CUDA_MEMCPY3D_st
* {
*
* unsigned int srcXInBytes, srcY, srcZ;
* unsigned int srcLOD;
* CUmemorytype srcMemoryType;
* const void *srcHost;
* CUdeviceptr srcDevice;
* CUarray srcArray;
* unsigned int srcPitch; // ignored when src is array
* unsigned int srcHeight; // ignored when src is array;
* may be 0 if Depth==1
*
* unsigned int dstXInBytes, dstY, dstZ;
* unsigned int dstLOD;
* CUmemorytype dstMemoryType;
* void *dstHost;
* CUdeviceptr dstDevice;
* CUarray dstArray;
* unsigned int dstPitch; // ignored when dst is array
* unsigned int dstHeight; // ignored when dst is array;
* may be 0 if Depth==1
*
* unsigned int WidthInBytes;
* unsigned int Height;
* unsigned int Depth;
* } CUDA_MEMCPY3D;
* where:
*
* -
*
srcMemoryType and dstMemoryType
* specify the type of memory of the source and destination, respectively;
* CUmemorytype_enum
* is defined as:
*
*
*
*
* typedef enum CUmemorytype_enum {
* CU_MEMORYTYPE_HOST = 0x01,
* CU_MEMORYTYPE_DEVICE = 0x02,
* CU_MEMORYTYPE_ARRAY = 0x03,
* CU_MEMORYTYPE_UNIFIED = 0x04
* } CUmemorytype;
*
* If srcMemoryType is CU_MEMORYTYPE_UNIFIED,
* srcDevice and srcPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. srcArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If srcMemoryType is CU_MEMORYTYPE_HOST,
* srcHost, srcPitch and srcHeight specify the (host) base address of the
* source data, the bytes per row, and the height of
* each 2D slice of the 3D array. srcArray
* is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_DEVICE,
* srcDevice, srcPitch and srcHeight specify the (device) base address of
* the source data, the bytes per row, and the height
* of each 2D slice of the 3D array. srcArray
* is ignored.
*
* If srcMemoryType is CU_MEMORYTYPE_ARRAY,
* srcArray specifies the handle of the source data. srcHost, srcDevice,
* srcPitch and srcHeight are ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_UNIFIED,
* dstDevice and dstPitch specify the (unified virtual address space) base
* address of the source data and the bytes per row
* to apply. dstArray is ignored. This value
* may be used only if unified addressing is supported in the calling
* context.
*
* If dstMemoryType is CU_MEMORYTYPE_HOST,
* dstHost and dstPitch specify the (host) base address of the destination
* data, the bytes per row, and the height of each
* 2D slice of the 3D array. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_DEVICE,
* dstDevice and dstPitch specify the (device) base address of the
* destination data, the bytes per row, and the height of each
* 2D slice of the 3D array. dstArray is
* ignored.
*
* If dstMemoryType is CU_MEMORYTYPE_ARRAY,
* dstArray specifies the handle of the destination data. dstHost,
* dstDevice, dstPitch and dstHeight are ignored.
*
*
* -
*
srcXInBytes, srcY and srcZ
* specify the base address of the source data for the copy.
*
*
*
*
* For host pointers, the starting address
* is
*
void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch
* + srcXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr Start =
* srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*
* For CUDA arrays, srcXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
dstXInBytes, dstY and dstZ
* specify the base address of the destination data for the copy.
*
*
*
*
* For host pointers, the base address is
*
void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch
* + dstXInBytes);
*
* For device pointers, the starting
* address is
*
CUdeviceptr dstStart =
* dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*
* For CUDA arrays, dstXInBytes must be
* evenly divisible by the array element size.
*
*
* -
*
WidthInBytes, Height and Depth
* specify the width (in bytes), height and depth of the 3D copy being
* performed.
*
*
* -
*
If specified, srcPitch must be
* greater than or equal to WidthInBytes + srcXInBytes, and dstPitch must
* be greater than or equal
* to WidthInBytes + dstXInBytes.
*
*
* -
*
If specified, srcHeight must
* be greater than or equal to Height + srcY, and dstHeight must be
* greater than or equal to Height
* + dstY.
*
*
*
*
* cuMemcpy3D() returns an error if any
* pitch is greater than the maximum allowed
* (CU_DEVICE_ATTRIBUTE_MAX_PITCH).
*
* cuMemcpy3DAsync() is asynchronous and
* can optionally be associated to a stream by passing a non-zero hStream argument. It only works on page-locked host memory and
* returns an error if a pointer to pageable memory is passed as input.
*
* The srcLOD and dstLOD members of the
* CUDA_MEMCPY3D structure must be set to 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy, CUstream hStream)
{
return checkResult(cuMemcpy3DAsyncNative(pCopy, hStream));
}
private static native int cuMemcpy3DAsyncNative(CUDA_MEMCPY3D pCopy, CUstream hStream);
/**
* Copies memory between contexts asynchronously.
*
*
* CUresult cuMemcpy3DPeerAsync (
* const CUDA_MEMCPY3D_PEER* pCopy,
* CUstream hStream )
*
*
* Copies memory between contexts
* asynchronously. Perform a 3D memory copy according to the parameters
* specified in pCopy. See the definition of the CUDA_MEMCPY3D_PEER
* structure for documentation of its parameters.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCopy Parameters for the memory copy
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyPeer
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyPeerAsync
* @see JCudaDriver#cuMemcpy3DPeerAsync
*/
public static int cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream)
{
return checkResult(cuMemcpy3DPeerAsyncNative(pCopy, hStream));
}
private static native int cuMemcpy3DPeerAsyncNative(CUDA_MEMCPY3D_PEER pCopy, CUstream hStream);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD8 (
* CUdeviceptr dstDevice,
* unsigned char uc,
* size_t N )
*
*
* Initializes device memory. Sets the
* memory range of N 8-bit values to the specified value uc.
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param uc Value to set
* @param N Number of elements
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD8(CUdeviceptr dstDevice, byte uc, long N)
{
return checkResult(cuMemsetD8Native(dstDevice, uc, N));
}
private static native int cuMemsetD8Native(CUdeviceptr dstDevice, byte uc, long N);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD16 (
* CUdeviceptr dstDevice,
* unsigned short us,
* size_t N )
*
*
* Initializes device memory. Sets the
* memory range of N 16-bit values to the specified value us. The dstDevice pointer must be two byte aligned.
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param us Value to set
* @param N Number of elements
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD16(CUdeviceptr dstDevice, short us, long N)
{
return checkResult(cuMemsetD16Native(dstDevice, us, N));
}
private static native int cuMemsetD16Native(CUdeviceptr dstDevice, short us, long N);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD32 (
* CUdeviceptr dstDevice,
* unsigned int ui,
* size_t N )
*
*
* Initializes device memory. Sets the
* memory range of N 32-bit values to the specified value ui. The dstDevice pointer must be four byte aligned.
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param ui Value to set
* @param N Number of elements
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD32(CUdeviceptr dstDevice, int ui, long N)
{
return checkResult(cuMemsetD32Native(dstDevice, ui, N));
}
private static native int cuMemsetD32Native(CUdeviceptr dstDevice, int ui, long N);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD2D8 (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned char uc,
* size_t Width,
* size_t Height )
*
*
* Initializes device memory. Sets the 2D
* memory range of Width 8-bit values to the specified value
* uc. Height specifies the number of rows to set,
* and dstPitch specifies the number of bytes between each row.
* This function performs fastest when the pitch is one that has been
* passed
* back by cuMemAllocPitch().
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param uc Value to set
* @param Width Width of row
* @param Height Number of rows
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D8(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height)
{
return checkResult(cuMemsetD2D8Native(dstDevice, dstPitch, uc, Width, Height));
}
private static native int cuMemsetD2D8Native(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD2D16 (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned short us,
* size_t Width,
* size_t Height )
*
*
* Initializes device memory. Sets the 2D
* memory range of Width 16-bit values to the specified value
* us. Height specifies the number of rows to set,
* and dstPitch specifies the number of bytes between each row.
* The dstDevice pointer and dstPitch offset must be
* two byte aligned. This function performs fastest when the pitch is one
* that has been passed back by cuMemAllocPitch().
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param us Value to set
* @param Width Width of row
* @param Height Number of rows
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D16(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height)
{
return checkResult(cuMemsetD2D16Native(dstDevice, dstPitch, us, Width, Height));
}
private static native int cuMemsetD2D16Native(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height);
/**
* Initializes device memory.
*
*
* CUresult cuMemsetD2D32 (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned int ui,
* size_t Width,
* size_t Height )
*
*
* Initializes device memory. Sets the 2D
* memory range of Width 32-bit values to the specified value
* ui. Height specifies the number of rows to set,
* and dstPitch specifies the number of bytes between each row.
* The dstDevice pointer and dstPitch offset must be
* four byte aligned. This function performs fastest when the pitch is
* one that has been passed back by cuMemAllocPitch().
*
* Note that this function is asynchronous
* with respect to the host unless dstDevice refers to pinned
* host memory.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param ui Value to set
* @param Width Width of row
* @param Height Number of rows
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D32(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height)
{
return checkResult(cuMemsetD2D32Native(dstDevice, dstPitch, ui, Width, Height));
}
private static native int cuMemsetD2D32Native(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD8Async (
* CUdeviceptr dstDevice,
* unsigned char uc,
* size_t N,
* CUstream hStream )
*
*
* Sets device memory. Sets the memory
* range of N 8-bit values to the specified value uc.
*
* cuMemsetD8Async() is asynchronous and
* can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param uc Value to set
* @param N Number of elements
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD8Async(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream)
{
return checkResult(cuMemsetD8AsyncNative(dstDevice, uc, N, hStream));
}
private static native int cuMemsetD8AsyncNative(CUdeviceptr dstDevice, byte uc, long N, CUstream hStream);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD16Async (
* CUdeviceptr dstDevice,
* unsigned short us,
* size_t N,
* CUstream hStream )
*
*
* Sets device memory. Sets the memory
* range of N 16-bit values to the specified value us.
* The dstDevice pointer must be two byte aligned.
*
* cuMemsetD16Async() is asynchronous and
* can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param us Value to set
* @param N Number of elements
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD16Async(CUdeviceptr dstDevice, short us, long N, CUstream hStream)
{
return checkResult(cuMemsetD16AsyncNative(dstDevice, us, N, hStream));
}
private static native int cuMemsetD16AsyncNative(CUdeviceptr dstDevice, short us, long N, CUstream hStream);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD32Async (
* CUdeviceptr dstDevice,
* unsigned int ui,
* size_t N,
* CUstream hStream )
*
*
* Sets device memory. Sets the memory
* range of N 32-bit values to the specified value ui.
* The dstDevice pointer must be four byte aligned.
*
* cuMemsetD32Async() is asynchronous and
* can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param ui Value to set
* @param N Number of elements
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
*/
public static int cuMemsetD32Async(CUdeviceptr dstDevice, int ui, long N, CUstream hStream)
{
return checkResult(cuMemsetD32AsyncNative(dstDevice, ui, N, hStream));
}
private static native int cuMemsetD32AsyncNative(CUdeviceptr dstDevice, int ui, long N, CUstream hStream);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD2D8Async (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned char uc,
* size_t Width,
* size_t Height,
* CUstream hStream )
*
*
* Sets device memory. Sets the 2D memory
* range of Width 8-bit values to the specified value uc. Height specifies the number of rows to set, and
* dstPitch specifies the number of bytes between each row. This
* function performs fastest when the pitch is one that has been passed
* back by cuMemAllocPitch().
*
* cuMemsetD2D8Async() is asynchronous and
* can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param uc Value to set
* @param Width Width of row
* @param Height Number of rows
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D8Async(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream)
{
return checkResult(cuMemsetD2D8AsyncNative(dstDevice, dstPitch, uc, Width, Height, hStream));
}
private static native int cuMemsetD2D8AsyncNative(CUdeviceptr dstDevice, long dstPitch, byte uc, long Width, long Height, CUstream hStream);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD2D16Async (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned short us,
* size_t Width,
* size_t Height,
* CUstream hStream )
*
*
* Sets device memory. Sets the 2D memory
* range of Width 16-bit values to the specified value us. Height specifies the number of rows to set, and
* dstPitch specifies the number of bytes between each row. The
* dstDevice pointer and dstPitch offset must be two
* byte aligned. This function performs fastest when the pitch is one that
* has been passed back by cuMemAllocPitch().
*
* cuMemsetD2D16Async() is asynchronous
* and can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param us Value to set
* @param Width Width of row
* @param Height Number of rows
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD2D32Async
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D16Async(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream)
{
return checkResult(cuMemsetD2D16AsyncNative(dstDevice, dstPitch, us, Width, Height, hStream));
}
private static native int cuMemsetD2D16AsyncNative(CUdeviceptr dstDevice, long dstPitch, short us, long Width, long Height, CUstream hStream);
/**
* Sets device memory.
*
*
* CUresult cuMemsetD2D32Async (
* CUdeviceptr dstDevice,
* size_t dstPitch,
* unsigned int ui,
* size_t Width,
* size_t Height,
* CUstream hStream )
*
*
* Sets device memory. Sets the 2D memory
* range of Width 32-bit values to the specified value ui. Height specifies the number of rows to set, and
* dstPitch specifies the number of bytes between each row. The
* dstDevice pointer and dstPitch offset must be four
* byte aligned. This function performs fastest when the pitch is one that
* has been passed back by cuMemAllocPitch().
*
* cuMemsetD2D32Async() is asynchronous
* and can optionally be associated to a stream by passing a non-zero stream argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dstDevice Destination device pointer
* @param dstPitch Pitch of destination device pointer
* @param ui Value to set
* @param Width Width of row
* @param Height Number of rows
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D8Async
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D16Async
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD8Async
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD16Async
* @see JCudaDriver#cuMemsetD32
* @see JCudaDriver#cuMemsetD32Async
*/
public static int cuMemsetD2D32Async(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream)
{
return checkResult(cuMemsetD2D32AsyncNative(dstDevice, dstPitch, ui, Width, Height, hStream));
}
private static native int cuMemsetD2D32AsyncNative(CUdeviceptr dstDevice, long dstPitch, int ui, long Width, long Height, CUstream hStream);
/**
* Returns information about a function.
*
*
* CUresult cuFuncGetAttribute (
* int* pi,
* CUfunction_attribute attrib,
* CUfunction hfunc )
*
*
* Returns information about a function.
* Returns in *pi the integer value of the attribute attrib on the kernel given by hfunc. The supported
* attributes are:
*
* -
*
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
* The maximum number of threads per block, beyond which a launch of the
* function would fail. This number depends on both the
* function and the device on which
* the function is currently loaded.
*
*
* -
*
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
* The size in bytes of statically-allocated shared memory per block
* required by this function. This does not include dynamically-allocated
* shared memory requested by the
* user at runtime.
*
*
* -
*
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES:
* The size in bytes of user-allocated constant memory required by this
* function.
*
*
* -
*
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES:
* The size in bytes of local memory used by each thread of this
* function.
*
*
* -
*
CU_FUNC_ATTRIBUTE_NUM_REGS:
* The number of registers used by each thread of this function.
*
*
* -
*
CU_FUNC_ATTRIBUTE_PTX_VERSION:
* The PTX virtual architecture version for which the function was
* compiled. This value is the major PTX version * 10 + the
* minor PTX version, so a PTX
* version 1.3 function would return the value 13. Note that this may
* return the undefined value
* of 0 for cubins compiled prior
* to CUDA 3.0.
*
*
* -
*
CU_FUNC_ATTRIBUTE_BINARY_VERSION:
* The binary architecture version for which the function was compiled.
* This value is the major binary version * 10 + the minor
* binary version, so a binary
* version 1.3 function would return the value 13. Note that this will
* return a value of 10 for legacy
* cubins that do not have a
* properly-encoded binary architecture version.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pi Returned attribute value
* @param attrib Attribute requested
* @param hfunc Function to query attribute of
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuFuncSetCacheConfig
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuFuncGetAttribute (int pi[], int attrib, CUfunction func)
{
return checkResult(cuFuncGetAttributeNative(pi, attrib, func));
}
private static native int cuFuncGetAttributeNative(int pi[], int attrib, CUfunction func);
/**
* Sets the block-dimensions for the function.
*
*
* CUresult cuFuncSetBlockShape (
* CUfunction hfunc,
* int x,
* int y,
* int z )
*
*
* Sets the block-dimensions for the
* function.
* DeprecatedSpecifies the x, y, and z dimensions of the thread blocks that are
* created when the kernel given by hfunc is launched.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to specify dimensions of
* @param x X dimension
* @param y Y dimension
* @param z Z dimension
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncSetCacheConfig
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
{
return checkResult(cuFuncSetBlockShapeNative(hfunc, x, y, z));
}
private static native int cuFuncSetBlockShapeNative(CUfunction hfunc, int x, int y, int z);
/**
* Sets the dynamic shared-memory size for the function.
*
*
* CUresult cuFuncSetSharedSize (
* CUfunction hfunc,
* unsigned int bytes )
*
*
* Sets the dynamic shared-memory size for
* the function.
* DeprecatedSets through bytes
* the amount of dynamic shared memory that will be available to each
* thread block when the kernel given by hfunc is launched.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to specify dynamic shared-memory size for
* @param bytes Dynamic shared-memory size per thread in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetCacheConfig
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuFuncSetSharedSize(CUfunction hfunc, int bytes)
{
return checkResult(cuFuncSetSharedSizeNative(hfunc, bytes));
}
private static native int cuFuncSetSharedSizeNative(CUfunction hfunc, int bytes);
/**
* Sets the preferred cache configuration for a device function.
*
*
* CUresult cuFuncSetCacheConfig (
* CUfunction hfunc,
* CUfunc_cache config )
*
*
* Sets the preferred cache configuration
* for a device function. On devices where the L1 cache and shared memory
* use the same
* hardware resources, this sets through
* config the preferred cache configuration for the device
* function hfunc. This is only a preference. The driver will
* use the requested configuration if possible, but it is free to choose
* a different
* configuration if required to execute hfunc. Any context-wide preference set via cuCtxSetCacheConfig()
* will be overridden by this per-function setting unless the per-function
* setting is CU_FUNC_CACHE_PREFER_NONE. In that case, the current
* context-wide setting will be used.
*
* This setting does nothing on devices
* where the size of the L1 cache and shared memory are fixed.
*
* Launching a kernel with a different
* preference than the most recent preference setting may insert a
* device-side synchronization
* point.
*
* The supported cache configurations are:
*
* -
*
CU_FUNC_CACHE_PREFER_NONE: no
* preference for shared memory or L1 (default)
*
*
* -
*
CU_FUNC_CACHE_PREFER_SHARED:
* prefer larger shared memory and smaller L1 cache
*
*
* -
*
CU_FUNC_CACHE_PREFER_L1: prefer
* larger L1 cache and smaller shared memory
*
*
* -
*
CU_FUNC_CACHE_PREFER_EQUAL:
* prefer equal sized L1 cache and shared memory
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to configure cache for
* @param config Requested cache configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
* CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuFuncSetCacheConfig(CUfunction hfunc, int config)
{
return checkResult(cuFuncSetCacheConfigNative(hfunc, config));
}
private static native int cuFuncSetCacheConfigNative(CUfunction hfunc, int config);
/**
* Sets the shared memory configuration for a device function.
*
*
* CUresult cuFuncSetSharedMemConfig (
* CUfunction hfunc,
* CUsharedconfig config )
*
*
* Sets the shared memory configuration for
* a device function. On devices with configurable shared memory banks,
* this function
* will force all subsequent launches of
* the specified device function to have the given shared memory bank size
* configuration.
* On any given launch of the function, the
* shared memory configuration of the device will be temporarily changed
* if needed to
* suit the function's preferred
* configuration. Changes in shared memory configuration between subsequent
* launches of functions,
* may introduce a device side synchronization
* point.
*
* Any per-function setting of shared
* memory bank size set via cuFuncSetSharedMemConfig will override the
* context wide setting set with cuCtxSetSharedMemConfig.
*
* Changing the shared memory bank size
* will not increase shared memory usage or affect occupancy of kernels,
* but may have major
* effects on performance. Larger bank sizes
* will allow for greater potential bandwidth to shared memory, but will
* change what
* kinds of accesses to shared memory will
* result in bank conflicts.
*
* This function will do nothing on devices
* with fixed shared memory bank size.
*
* The supported bank configurations are:
*
* -
*
CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
* use the context's shared memory configuration when launching this
* function.
*
*
* -
*
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
* to be natively four bytes when launching this function.
*
*
* -
*
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
* width to be natively eight bytes when launching this function.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc kernel to be given a shared memory config
* @param config requested shared memory configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_DEINITIALIZED,
* CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxGetSharedMemConfig
* @see JCudaDriver#cuCtxSetSharedMemConfigcuFuncGetAttribute
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuFuncSetSharedMemConfig(CUfunction hfunc, int config)
{
return checkResult(cuFuncSetSharedMemConfigNative(hfunc, config));
}
private static native int cuFuncSetSharedMemConfigNative(CUfunction hfunc, int config);
/**
* Creates a 1D or 2D CUDA array.
*
*
* CUresult cuArrayCreate (
* CUarray* pHandle,
* const CUDA_ARRAY_DESCRIPTOR* pAllocateArray )
*
*
* Creates a 1D or 2D CUDA array. Creates
* a CUDA array according to the CUDA_ARRAY_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY_DESCRIPTOR is defined as:
*
* typedef struct {
* unsigned int Width;
* unsigned int Height;
* CUarray_format Format;
* unsigned int NumChannels;
* } CUDA_ARRAY_DESCRIPTOR;
* where:
*
* -
*
Width, and Height are the width, and height of the CUDA array (in elements);
* the CUDA array is one-dimensional if height is 0, two-dimensional
* otherwise;
*
*
* -
*
* Format specifies the format
* of the elements; CUarray_format is defined as:
*
typedef enum
* CUarray_format_enum {
* CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
* CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
* CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
* CU_AD_FORMAT_SIGNED_INT8 = 0x08,
* CU_AD_FORMAT_SIGNED_INT16 = 0x09,
* CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
* CU_AD_FORMAT_HALF = 0x10,
* CU_AD_FORMAT_FLOAT = 0x20
* } CUarray_format;
*
*
* -
*
NumChannels specifies
* the number of packed components per CUDA array element; it may be 1,
* 2, or 4;
*
*
*
*
* Here are examples of CUDA array
* descriptions:
*
* Description for a CUDA array of 2048
* floats:
*
CUDA_ARRAY_DESCRIPTOR desc;
* desc.Format = CU_AD_FORMAT_FLOAT;
* desc.NumChannels = 1;
* desc.Width = 2048;
* desc.Height = 1;
*
* Description for a 64 x 64 CUDA array of
* floats:
*
CUDA_ARRAY_DESCRIPTOR desc;
* desc.Format = CU_AD_FORMAT_FLOAT;
* desc.NumChannels = 1;
* desc.Width = 64;
* desc.Height = 64;
*
* Description for a width x height CUDA array of 64-bit, 4x16-bit float16's:
*
* CUDA_ARRAY_DESCRIPTOR desc;
* desc.FormatFlags = CU_AD_FORMAT_HALF;
* desc.NumChannels = 4;
* desc.Width = width;
* desc.Height = height;
*
* Description for a width x height CUDA array of 16-bit elements, each of which is two 8-bit
* unsigned chars:
*
CUDA_ARRAY_DESCRIPTOR arrayDesc;
* desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
* desc.NumChannels = 2;
* desc.Width = width;
* desc.Height = height;
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pHandle Returned array
* @param pAllocateArray Array descriptor
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuArrayCreate(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray)
{
return checkResult(cuArrayCreateNative(pHandle, pAllocateArray));
}
private static native int cuArrayCreateNative(CUarray pHandle, CUDA_ARRAY_DESCRIPTOR pAllocateArray);
/**
* Get a 1D or 2D CUDA array descriptor.
*
*
* CUresult cuArrayGetDescriptor (
* CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor,
* CUarray hArray )
*
*
* Get a 1D or 2D CUDA array descriptor.
* Returns in *pArrayDescriptor a descriptor containing
* information on the format and dimensions of the CUDA array hArray. It is useful for subroutines that have been passed a CUDA
* array, but need to know the CUDA array parameters for validation
* or other purposes.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pArrayDescriptor Returned array descriptor
* @param hArray Array to get descriptor of
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray)
{
return checkResult(cuArrayGetDescriptorNative(pArrayDescriptor, hArray));
}
private static native int cuArrayGetDescriptorNative(CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, CUarray hArray);
/**
* Destroys a CUDA array.
*
*
* CUresult cuArrayDestroy (
* CUarray hArray )
*
*
* Destroys a CUDA array. Destroys the CUDA
* array hArray.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hArray Array to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_ARRAY_IS_MAPPED
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuArrayDestroy(CUarray hArray)
{
return checkResult(cuArrayDestroyNative(hArray));
}
private static native int cuArrayDestroyNative(CUarray hArray);
/**
* Creates a 3D CUDA array.
*
*
* CUresult cuArray3DCreate (
* CUarray* pHandle,
* const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray )
*
*
* Creates a 3D CUDA array. Creates a CUDA
* array according to the CUDA_ARRAY3D_DESCRIPTOR structure pAllocateArray and returns a handle to the new CUDA array in *pHandle. The CUDA_ARRAY3D_DESCRIPTOR is defined as:
*
* typedef struct {
* unsigned int Width;
* unsigned int Height;
* unsigned int Depth;
* CUarray_format Format;
* unsigned int NumChannels;
* unsigned int Flags;
* } CUDA_ARRAY3D_DESCRIPTOR;
* where:
*
* -
*
* Width, Height, and Depth are the width, height, and depth of
* the CUDA array (in elements); the following types of CUDA arrays can
* be allocated:
*
* -
*
A 1D array is allocated
* if Height and Depth extents are both zero.
*
*
* -
*
A 2D array is allocated
* if only Depth extent is zero.
*
*
* -
*
A 3D array is allocated
* if all three extents are non-zero.
*
*
* -
*
A 1D layered CUDA
* array is allocated if only Height is zero and the
* CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
* of layers is determined by the depth extent.
*
*
* -
*
A 2D layered CUDA
* array is allocated if all three extents are non-zero and the
* CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
* of layers is determined by the depth extent.
*
*
* -
*
A cubemap CUDA array
* is allocated if all three extents are non-zero and the CUDA_ARRAY3D_CUBEMAP
* flag is set. Width must be equal to Height, and
* Depth must be six. A cubemap is a special type of 2D layered
* CUDA array, where the six layers represent the six faces of a cube.
* The order of the six
* layers in memory is the same as that listed in CUarray_cubemap_face.
*
*
* -
*
A cubemap layered CUDA
* array is allocated if all three extents are non-zero, and both,
* CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
* be a multiple of six. A cubemap layered CUDA array is a special type
* of 2D layered CUDA array that consists of a collection
* of cubemaps. The first
* six layers represent the first cubemap, the next six layers form the
* second cubemap, and so on.
*
*
*
*
*
*
*
*
* -
*
* Format specifies the format
* of the elements; CUarray_format is defined as:
*
typedef enum
* CUarray_format_enum {
* CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
* CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
* CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
* CU_AD_FORMAT_SIGNED_INT8 = 0x08,
* CU_AD_FORMAT_SIGNED_INT16 = 0x09,
* CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
* CU_AD_FORMAT_HALF = 0x10,
* CU_AD_FORMAT_FLOAT = 0x20
* } CUarray_format;
*
*
*
*
*
* -
*
NumChannels specifies
* the number of packed components per CUDA array element; it may be 1,
* 2, or 4;
*
*
*
*
*
* -
*
* Flags may be set to
*
* -
*
CUDA_ARRAY3D_LAYERED
* to enable creation of layered CUDA arrays. If this flag is set, Depth specifies the number of layers, not the depth of a 3D
* array.
*
*
* -
*
CUDA_ARRAY3D_SURFACE_LDST
* to enable surface references to be bound to the CUDA array. If this
* flag is not set, cuSurfRefSetArray will fail when attempting to bind
* the CUDA array to a surface reference.
*
*
* -
*
CUDA_ARRAY3D_CUBEMAP
* to enable creation of cubemaps. If this flag is set, Width
* must be equal to Height, and Depth must be six. If
* the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must
* be a multiple of six.
*
*
* -
*
CUDA_ARRAY3D_TEXTURE_GATHER
* to indicate that the CUDA array will be used for texture gather.
* Texture gather can only be performed on 2D CUDA arrays.
*
*
*
*
*
*
*
* Width, Height and
* Depth must meet certain size requirements as listed in the
* following table. All values are specified in elements. Note that for
* brevity's sake, the full name of the
* device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to
* the device attribute
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH.
*
* Note that 2D CUDA arrays have different
* size requirements if the CUDA_ARRAY3D_TEXTURE_GATHER flag is set. Width and Height must not be greater than
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH and
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in
* that case.
*
*
*
*
*
*
* CUDA array
* type
*
*
*
* Valid extents
* that must always be met
* {(width range in
* elements), (height range), (depth range)}
*
*
*
* Valid extents
* with CUDA_ARRAY3D_SURFACE_LDST set
* {(width range in
* elements), (height range), (depth range)}
*
*
*
*
*
* 1D
*
*
* { (1,TEXTURE1D_WIDTH),
* 0, 0 }
*
*
*
* { (1,SURFACE1D_WIDTH),
* 0, 0 }
*
*
*
*
*
* 2D
*
*
* { (1,TEXTURE2D_WIDTH),
* (1,TEXTURE2D_HEIGHT), 0 }
*
*
*
* { (1,SURFACE2D_WIDTH),
* (1,SURFACE2D_HEIGHT), 0 }
*
*
*
*
*
* 3D
*
*
* { (1,TEXTURE3D_WIDTH),
* (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
* OR
* {
* (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
* (1,TEXTURE3D_DEPTH_ALTERNATE) }
*
*
*
* { (1,SURFACE3D_WIDTH),
* (1,SURFACE3D_HEIGHT), (1,SURFACE3D_DEPTH) }
*
*
*
*
*
* 1D Layered
*
*
* {
* (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) }
*
*
*
* {
* (1,SURFACE1D_LAYERED_WIDTH), 0, (1,SURFACE1D_LAYERED_LAYERS) }
*
*
*
*
*
* 2D Layered
*
*
* {
* (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
* (1,TEXTURE2D_LAYERED_LAYERS) }
*
*
*
* {
* (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
* (1,SURFACE2D_LAYERED_LAYERS) }
*
*
*
*
*
* Cubemap
*
*
* { (1,TEXTURECUBEMAP_WIDTH),
* (1,TEXTURECUBEMAP_WIDTH), 6 }
*
*
*
* { (1,SURFACECUBEMAP_WIDTH),
* (1,SURFACECUBEMAP_WIDTH), 6 }
*
*
*
*
*
* Cubemap Layered
*
*
* {
* (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
* (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
*
*
*
* {
* (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
* (1,SURFACECUBEMAP_LAYERED_LAYERS) }
*
*
*
*
*
*
*
* Here are examples of CUDA array
* descriptions:
*
* Description for a CUDA array of 2048
* floats:
*
CUDA_ARRAY3D_DESCRIPTOR desc;
* desc.Format = CU_AD_FORMAT_FLOAT;
* desc.NumChannels = 1;
* desc.Width = 2048;
* desc.Height = 0;
* desc.Depth = 0;
*
* Description for a 64 x 64 CUDA array of
* floats:
*
CUDA_ARRAY3D_DESCRIPTOR desc;
* desc.Format = CU_AD_FORMAT_FLOAT;
* desc.NumChannels = 1;
* desc.Width = 64;
* desc.Height = 64;
* desc.Depth = 0;
*
* Description for a width x height x depth CUDA array of 64-bit, 4x16-bit float16's:
*
CUDA_ARRAY3D_DESCRIPTOR desc;
* desc.FormatFlags = CU_AD_FORMAT_HALF;
* desc.NumChannels = 4;
* desc.Width = width;
* desc.Height = height;
* desc.Depth = depth;
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pHandle Returned array
* @param pAllocateArray 3D array descriptor
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuArray3DGetDescriptor
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuArray3DCreate(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray)
{
return checkResult(cuArray3DCreateNative(pHandle, pAllocateArray));
}
private static native int cuArray3DCreateNative(CUarray pHandle, CUDA_ARRAY3D_DESCRIPTOR pAllocateArray);
/**
* Get a 3D CUDA array descriptor.
*
*
* CUresult cuArray3DGetDescriptor (
* CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor,
* CUarray hArray )
*
*
* Get a 3D CUDA array descriptor. Returns
* in *pArrayDescriptor a descriptor containing information on
* the format and dimensions of the CUDA array hArray. It is
* useful for subroutines that have been passed a CUDA array, but need to
* know the CUDA array parameters for validation
* or other purposes.
*
* This function may be called on 1D and
* 2D arrays, in which case the Height and/or Depth
* members of the descriptor struct will be set to 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pArrayDescriptor Returned 3D array descriptor
* @param hArray 3D array to get descriptor of
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuArray3DCreate
* @see JCudaDriver#cuArrayCreate
* @see JCudaDriver#cuArrayDestroy
* @see JCudaDriver#cuArrayGetDescriptor
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemAllocPitch
* @see JCudaDriver#cuMemcpy2D
* @see JCudaDriver#cuMemcpy2DAsync
* @see JCudaDriver#cuMemcpy2DUnaligned
* @see JCudaDriver#cuMemcpy3D
* @see JCudaDriver#cuMemcpy3DAsync
* @see JCudaDriver#cuMemcpyAtoA
* @see JCudaDriver#cuMemcpyAtoD
* @see JCudaDriver#cuMemcpyAtoH
* @see JCudaDriver#cuMemcpyAtoHAsync
* @see JCudaDriver#cuMemcpyDtoA
* @see JCudaDriver#cuMemcpyDtoD
* @see JCudaDriver#cuMemcpyDtoDAsync
* @see JCudaDriver#cuMemcpyDtoH
* @see JCudaDriver#cuMemcpyDtoHAsync
* @see JCudaDriver#cuMemcpyHtoA
* @see JCudaDriver#cuMemcpyHtoAAsync
* @see JCudaDriver#cuMemcpyHtoD
* @see JCudaDriver#cuMemcpyHtoDAsync
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemGetAddressRange
* @see JCudaDriver#cuMemGetInfo
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostGetDevicePointer
* @see JCudaDriver#cuMemsetD2D8
* @see JCudaDriver#cuMemsetD2D16
* @see JCudaDriver#cuMemsetD2D32
* @see JCudaDriver#cuMemsetD8
* @see JCudaDriver#cuMemsetD16
* @see JCudaDriver#cuMemsetD32
*/
public static int cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray)
{
return checkResult(cuArray3DGetDescriptorNative(pArrayDescriptor, hArray));
}
private static native int cuArray3DGetDescriptorNative(CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, CUarray hArray);
/**
* Creates a CUDA mipmapped array.
*
*
* CUresult cuMipmappedArrayCreate (
* CUmipmappedArray* pHandle,
* const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
* unsigned int numMipmapLevels )
*
*
* Creates a CUDA mipmapped array. Creates
* a CUDA mipmapped array according to the CUDA_ARRAY3D_DESCRIPTOR
* structure pMipmappedArrayDesc and returns a handle to the
* new CUDA mipmapped array in *pHandle. numMipmapLevels
* specifies the number of mipmap levels to be allocated. This value is
* clamped to the range [1, 1 + floor(log2(max(width, height,
* depth)))].
*
* The CUDA_ARRAY3D_DESCRIPTOR is defined
* as:
*
* typedef struct {
* unsigned int Width;
* unsigned int Height;
* unsigned int Depth;
* CUarray_format Format;
* unsigned int NumChannels;
* unsigned int Flags;
* } CUDA_ARRAY3D_DESCRIPTOR;
* where:
*
* -
*
* Width, Height, and Depth are the width, height, and depth of
* the CUDA array (in elements); the following types of CUDA arrays can
* be allocated:
*
* -
*
A 1D mipmapped array
* is allocated if Height and Depth extents are both
* zero.
*
*
* -
*
A 2D mipmapped array
* is allocated if only Depth extent is zero.
*
*
* -
*
A 3D mipmapped array
* is allocated if all three extents are non-zero.
*
*
* -
*
A 1D layered CUDA
* mipmapped array is allocated if only Height is zero and the
* CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
* of layers is determined by the depth extent.
*
*
* -
*
A 2D layered CUDA
* mipmapped array is allocated if all three extents are non-zero and the
* CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
* of layers is determined by the depth extent.
*
*
* -
*
A cubemap CUDA
* mipmapped array is allocated if all three extents are non-zero and the
* CUDA_ARRAY3D_CUBEMAP flag is set. Width must be equal to Height, and Depth must be six. A cubemap is a special
* type of 2D layered CUDA array, where the six layers represent the six
* faces of a cube.
* The order of the six
* layers in memory is the same as that listed in CUarray_cubemap_face.
*
*
* -
*
A cubemap layered CUDA
* mipmapped array is allocated if all three extents are non-zero, and
* both, CUDA_ARRAY3D_CUBEMAP and CUDA_ARRAY3D_LAYERED flags are set. Width must be equal to Height, and Depth must
* be a multiple of six. A cubemap layered CUDA array is a special type
* of 2D layered CUDA array that consists of a collection
* of cubemaps. The first
* six layers represent the first cubemap, the next six layers form the
* second cubemap, and so on.
*
*
*
*
*
*
*
*
* -
*
* Format specifies the format
* of the elements; CUarray_format is defined as:
*
typedef enum
* CUarray_format_enum {
* CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
* CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
* CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
* CU_AD_FORMAT_SIGNED_INT8 = 0x08,
* CU_AD_FORMAT_SIGNED_INT16 = 0x09,
* CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
* CU_AD_FORMAT_HALF = 0x10,
* CU_AD_FORMAT_FLOAT = 0x20
* } CUarray_format;
*
*
*
*
*
* -
*
NumChannels specifies
* the number of packed components per CUDA array element; it may be 1,
* 2, or 4;
*
*
*
*
*
* -
*
* Flags may be set to
*
* -
*
CUDA_ARRAY3D_LAYERED
* to enable creation of layered CUDA mipmapped arrays. If this flag is
* set, Depth specifies the number of layers, not the depth of
* a 3D array.
*
*
* -
*
CUDA_ARRAY3D_SURFACE_LDST
* to enable surface references to be bound to individual mipmap levels
* of the CUDA mipmapped array. If this flag is not set,
* cuSurfRefSetArray will
* fail when attempting to bind a mipmap level of the CUDA mipmapped array
* to a surface reference.
*
*
* -
*
CUDA_ARRAY3D_CUBEMAP
* to enable creation of mipmapped cubemaps. If this flag is set, Width must be equal to Height, and Depth must
* be six. If the CUDA_ARRAY3D_LAYERED flag is also set, then Depth must be a multiple of six.
*
*
* -
*
CUDA_ARRAY3D_TEXTURE_GATHER
* to indicate that the CUDA mipmapped array will be used for texture
* gather. Texture gather can only be performed on 2D CUDA
* mipmapped arrays.
*
*
*
*
*
*
*
* Width, Height and
* Depth must meet certain size requirements as listed in the
* following table. All values are specified in elements. Note that for
* brevity's sake, the full name of the
* device attribute is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH
* refers to the device
* attribute
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH.
*
*
*
*
*
*
* CUDA array
* type
*
*
*
* Valid extents
* that must always be met
* {(width range in
* elements), (height range), (depth range)}
*
*
*
*
*
* 1D
*
*
* {
* (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }
*
*
*
*
*
* 2D
*
*
* {
* (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }
*
*
*
*
*
* 3D
*
*
* { (1,TEXTURE3D_WIDTH),
* (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
* OR
* {
* (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
* (1,TEXTURE3D_DEPTH_ALTERNATE) }
*
*
*
*
*
* 1D Layered
*
*
* {
* (1,TEXTURE1D_LAYERED_WIDTH), 0, (1,TEXTURE1D_LAYERED_LAYERS) }
*
*
*
*
*
* 2D Layered
*
*
* {
* (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
* (1,TEXTURE2D_LAYERED_LAYERS) }
*
*
*
*
*
* Cubemap
*
*
* { (1,TEXTURECUBEMAP_WIDTH),
* (1,TEXTURECUBEMAP_WIDTH), 6 }
*
*
*
*
*
* Cubemap Layered
*
*
* {
* (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
* (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
*
*
*
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pHandle Returned mipmapped array
* @param pMipmappedArrayDesc mipmapped array descriptor
* @param numMipmapLevels Number of mipmap levels
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuMipmappedArrayDestroy
* @see JCudaDriver#cuMipmappedArrayGetLevel
* @see JCudaDriver#cuArrayCreate
*/
public static int cuMipmappedArrayCreate(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels)
{
return checkResult(cuMipmappedArrayCreateNative(pHandle, pMipmappedArrayDesc, numMipmapLevels));
}
private static native int cuMipmappedArrayCreateNative(CUmipmappedArray pHandle, CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, int numMipmapLevels);
/**
* Gets a mipmap level of a CUDA mipmapped array.
*
*
* CUresult cuMipmappedArrayGetLevel (
* CUarray* pLevelArray,
* CUmipmappedArray hMipmappedArray,
* unsigned int level )
*
*
* Gets a mipmap level of a CUDA mipmapped
* array. Returns in *pLevelArray a CUDA array that represents
* a single mipmap level of the CUDA mipmapped array hMipmappedArray.
*
* If level is greater than the
* maximum number of levels in this mipmapped array, CUDA_ERROR_INVALID_VALUE
* is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pLevelArray Returned mipmap level CUDA array
* @param hMipmappedArray CUDA mipmapped array
* @param level Mipmap level
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuMipmappedArrayCreate
* @see JCudaDriver#cuMipmappedArrayDestroy
* @see JCudaDriver#cuArrayCreate
*/
public static int cuMipmappedArrayGetLevel(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level)
{
return checkResult(cuMipmappedArrayGetLevelNative(pLevelArray, hMipmappedArray, level));
}
private static native int cuMipmappedArrayGetLevelNative(CUarray pLevelArray, CUmipmappedArray hMipmappedArray, int level);
/**
* Destroys a CUDA mipmapped array.
*
*
* CUresult cuMipmappedArrayDestroy (
* CUmipmappedArray hMipmappedArray )
*
*
* Destroys a CUDA mipmapped array. Destroys
* the CUDA mipmapped array hMipmappedArray.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hMipmappedArray Mipmapped array to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_ARRAY_IS_MAPPED
*
* @see JCudaDriver#cuMipmappedArrayCreate
* @see JCudaDriver#cuMipmappedArrayGetLevel
* @see JCudaDriver#cuArrayCreate
*/
public static int cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray)
{
return checkResult(cuMipmappedArrayDestroyNative(hMipmappedArray));
}
private static native int cuMipmappedArrayDestroyNative(CUmipmappedArray hMipmappedArray);
/**
* Creates a texture reference.
*
*
* CUresult cuTexRefCreate (
* CUtexref* pTexRef )
*
*
* Creates a texture reference.
* DeprecatedCreates a texture reference
* and returns its handle in *pTexRef. Once created, the
* application must call cuTexRefSetArray() or cuTexRefSetAddress() to
* associate the reference with allocated memory. Other texture reference
* functions are used to specify the format and interpretation
* (addressing, filtering, etc.) to be used
* when the memory is read through this texture reference.
*
*
*
* @param pTexRef Returned texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefDestroy
*/
public static int cuTexRefCreate(CUtexref pTexRef)
{
return checkResult(cuTexRefCreateNative(pTexRef));
}
private static native int cuTexRefCreateNative(CUtexref pTexRef);
/**
* Destroys a texture reference.
*
*
* CUresult cuTexRefDestroy (
* CUtexref hTexRef )
*
*
* Destroys a texture reference.
* DeprecatedDestroys the texture reference
* specified by hTexRef.
*
*
*
* @param hTexRef Texture reference to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefCreate
*/
public static int cuTexRefDestroy(CUtexref hTexRef)
{
return checkResult(cuTexRefDestroyNative(hTexRef));
}
private static native int cuTexRefDestroyNative(CUtexref hTexRef);
/**
* Binds an array as a texture reference.
*
*
* CUresult cuTexRefSetArray (
* CUtexref hTexRef,
* CUarray hArray,
* unsigned int Flags )
*
*
* Binds an array as a texture reference.
* Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state associated with
* the texture reference is superseded by this function. Flags
* must be set to CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound
* to hTexRef is unbound.
*
*
*
* @param hTexRef Texture reference to bind
* @param hArray Array to bind
* @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, int Flags)
{
return checkResult(cuTexRefSetArrayNative(hTexRef, hArray, Flags));
}
private static native int cuTexRefSetArrayNative(CUtexref hTexRef, CUarray hArray, int Flags);
/**
* Binds a mipmapped array to a texture reference.
*
*
* CUresult cuTexRefSetMipmappedArray (
* CUtexref hTexRef,
* CUmipmappedArray hMipmappedArray,
* unsigned int Flags )
*
*
* Binds a mipmapped array to a texture
* reference. Binds the CUDA mipmapped array hMipmappedArray
* to the texture reference hTexRef. Any previous address or
* CUDA array state associated with the texture reference is superseded
* by this function. Flags must be set to CU_TRSA_OVERRIDE_FORMAT.
* Any CUDA array previously bound to hTexRef is unbound.
*
*
*
* @param hTexRef Texture reference to bind
* @param hMipmappedArray Mipmapped array to bind
* @param Flags Options (must be CU_TRSA_OVERRIDE_FORMAT)
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags)
{
return checkResult(cuTexRefSetMipmappedArrayNative(hTexRef, hMipmappedArray, Flags));
}
private static native int cuTexRefSetMipmappedArrayNative(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, int Flags);
/**
* Binds an address as a texture reference.
*
*
* CUresult cuTexRefSetAddress (
* size_t* ByteOffset,
* CUtexref hTexRef,
* CUdeviceptr dptr,
* size_t bytes )
*
*
* Binds an address as a texture reference.
* Binds a linear address range to the texture reference hTexRef.
* Any previous address or CUDA array state associated with the texture
* reference is superseded by this function. Any memory
* previously bound to hTexRef is
* unbound.
*
* Since the hardware enforces an alignment
* requirement on texture base addresses, cuTexRefSetAddress() passes back
* a byte offset in *ByteOffset that must be applied to texture
* fetches in order to read from the desired memory. This offset must be
* divided by the texel
* size and passed to kernels that read from
* the texture so they can be applied to the tex1Dfetch() function.
*
* If the device memory pointer was returned
* from cuMemAlloc(), the offset is guaranteed to be 0 and NULL may be
* passed as the ByteOffset parameter.
*
* The total number of elements (or texels)
* in the linear address range cannot exceed
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
* elements is computed as (bytes / bytesPerElement), where
* bytesPerElement is determined from the data format and number of
* components set using cuTexRefSetFormat().
*
*
*
* @param ByteOffset Returned byte offset
* @param hTexRef Texture reference to bind
* @param dptr Device pointer to bind
* @param bytes Size of memory to bind in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetAddress(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes)
{
return checkResult(cuTexRefSetAddressNative(ByteOffset, hTexRef, dptr, bytes));
}
private static native int cuTexRefSetAddressNative(long ByteOffset[], CUtexref hTexRef, CUdeviceptr dptr, long bytes);
/**
* Sets the format for a texture reference.
*
*
* CUresult cuTexRefSetFormat (
* CUtexref hTexRef,
* CUarray_format fmt,
* int NumPackedComponents )
*
*
* Sets the format for a texture reference.
* Specifies the format of the data to be read by the texture reference
* hTexRef. fmt and NumPackedComponents are
* exactly analogous to the Format and NumChannels members of the
* CUDA_ARRAY_DESCRIPTOR structure: They specify the format of each
* component and the number of components per array element.
*
*
*
* @param hTexRef Texture reference
* @param fmt Format to set
* @param NumPackedComponents Number of components per array element
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetFormat(CUtexref hTexRef, int fmt, int NumPackedComponents)
{
return checkResult(cuTexRefSetFormatNative(hTexRef, fmt, NumPackedComponents));
}
private static native int cuTexRefSetFormatNative(CUtexref hTexRef, int fmt, int NumPackedComponents);
/**
* Binds an address as a 2D texture reference.
*
*
* CUresult cuTexRefSetAddress2D (
* CUtexref hTexRef,
* const CUDA_ARRAY_DESCRIPTOR* desc,
* CUdeviceptr dptr,
* size_t Pitch )
*
*
* Binds an address as a 2D texture
* reference. Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated with
* the texture reference is superseded by this function. Any memory
* previously bound to hTexRef is
* unbound.
*
* Using a tex2D() function inside a kernel
* requires a call to either cuTexRefSetArray() to bind the corresponding
* texture reference to an array, or cuTexRefSetAddress2D() to bind the
* texture reference to linear memory.
*
* Function calls to cuTexRefSetFormat()
* cannot follow calls to cuTexRefSetAddress2D() for the same texture
* reference.
*
* It is required that dptr be
* aligned to the appropriate hardware-specific texture alignment. You
* can query this value using the device attribute
* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned dptr
* is supplied, CUDA_ERROR_INVALID_VALUE is returned.
*
* Pitch has to be aligned to
* the hardware-specific texture pitch alignment. This value can be
* queried using the device attribute
* CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT.
* If an unaligned Pitch is supplied, CUDA_ERROR_INVALID_VALUE
* is returned.
*
* Width and Height, which are specified
* in elements (or texels), cannot exceed
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. Pitch, which is specified in bytes, cannot exceed
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
*
*
*
* @param hTexRef Texture reference to bind
* @param desc Descriptor of CUDA array
* @param dptr Device pointer to bind
* @param Pitch Line pitch in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetAddress2D(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes)
{
return checkResult(cuTexRefSetAddress2DNative(hTexRef, desc, dptr, PitchInBytes));
}
private static native int cuTexRefSetAddress2DNative(CUtexref hTexRef, CUDA_ARRAY_DESCRIPTOR desc, CUdeviceptr dptr, long PitchInBytes);
/**
* Sets the addressing mode for a texture reference.
*
*
* CUresult cuTexRefSetAddressMode (
* CUtexref hTexRef,
* int dim,
* CUaddress_mode am )
*
*
* Sets the addressing mode for a texture
* reference. Specifies the addressing mode am for the given
* dimension dim of the texture reference hTexRef. If
* dim is zero, the addressing mode is applied to the first
* parameter of the functions used to fetch from the texture; if dim is 1, the second, and so on. CUaddress_mode is defined as:
*
typedef enum CUaddress_mode_enum {
* CU_TR_ADDRESS_MODE_WRAP = 0,
* CU_TR_ADDRESS_MODE_CLAMP = 1,
* CU_TR_ADDRESS_MODE_MIRROR = 2,
* CU_TR_ADDRESS_MODE_BORDER = 3
* } CUaddress_mode;
*
* Note that this call has no effect if
* hTexRef is bound to linear memory. Also, if the flag,
* CU_TRSF_NORMALIZED_COORDINATES, is not set, the only supported address
* mode is CU_TR_ADDRESS_MODE_CLAMP.
*
*
*
* @param hTexRef Texture reference
* @param dim Dimension
* @param am Addressing mode to set
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetAddressMode(CUtexref hTexRef, int dim, int am)
{
return checkResult(cuTexRefSetAddressModeNative(hTexRef, dim, am));
}
private static native int cuTexRefSetAddressModeNative(CUtexref hTexRef, int dim, int am);
/**
* Sets the filtering mode for a texture reference.
*
*
* CUresult cuTexRefSetFilterMode (
* CUtexref hTexRef,
* CUfilter_mode fm )
*
*
* Sets the filtering mode for a texture
* reference. Specifies the filtering mode fm to be used when
* reading memory through the texture reference hTexRef.
* CUfilter_mode_enum is defined as:
*
* typedef enum CUfilter_mode_enum {
* CU_TR_FILTER_MODE_POINT = 0,
* CU_TR_FILTER_MODE_LINEAR = 1
* } CUfilter_mode;
*
* Note that this call has no effect if
* hTexRef is bound to linear memory.
*
*
*
* @param hTexRef Texture reference
* @param fm Filtering mode to set
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetFilterMode(CUtexref hTexRef, int fm)
{
return checkResult(cuTexRefSetFilterModeNative(hTexRef, fm));
}
private static native int cuTexRefSetFilterModeNative(CUtexref hTexRef, int fm);
/**
* Sets the mipmap filtering mode for a texture reference.
*
*
* CUresult cuTexRefSetMipmapFilterMode (
* CUtexref hTexRef,
* CUfilter_mode fm )
*
*
* Sets the mipmap filtering mode for a
* texture reference. Specifies the mipmap filtering mode fm
* to be used when reading memory through the texture reference hTexRef. CUfilter_mode_enum is defined as:
*
* typedef enum CUfilter_mode_enum {
* CU_TR_FILTER_MODE_POINT = 0,
* CU_TR_FILTER_MODE_LINEAR = 1
* } CUfilter_mode;
*
* Note that this call has no effect if
* hTexRef is not bound to a mipmapped array.
*
*
*
* @param hTexRef Texture reference
* @param fm Filtering mode to set
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetMipmapFilterMode(CUtexref hTexRef, int fm)
{
return checkResult(cuTexRefSetMipmapFilterModeNative(hTexRef, fm));
}
private static native int cuTexRefSetMipmapFilterModeNative(CUtexref hTexRef, int fm);
/**
* Sets the mipmap level bias for a texture reference.
*
*
* CUresult cuTexRefSetMipmapLevelBias (
* CUtexref hTexRef,
* float bias )
*
*
* Sets the mipmap level bias for a texture
* reference. Specifies the mipmap level bias bias to be added
* to the specified mipmap level when reading memory through the texture
* reference hTexRef.
*
* Note that this call has no effect if
* hTexRef is not bound to a mipmapped array.
*
*
*
* @param hTexRef Texture reference
* @param bias Mipmap level bias
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias)
{
return checkResult(cuTexRefSetMipmapLevelBiasNative(hTexRef, bias));
}
private static native int cuTexRefSetMipmapLevelBiasNative(CUtexref hTexRef, float bias);
/**
* Sets the mipmap min/max mipmap level clamps for a texture reference.
*
*
* CUresult cuTexRefSetMipmapLevelClamp (
* CUtexref hTexRef,
* float minMipmapLevelClamp,
* float maxMipmapLevelClamp )
*
*
* Sets the mipmap min/max mipmap level
* clamps for a texture reference. Specifies the min/max mipmap level
* clamps, minMipmapLevelClamp and maxMipmapLevelClamp
* respectively, to be used when reading memory through the texture
* reference hTexRef.
*
* Note that this call has no effect if
* hTexRef is not bound to a mipmapped array.
*
*
*
* @param hTexRef Texture reference
* @param minMipmapLevelClamp Mipmap min level clamp
* @param maxMipmapLevelClamp Mipmap max level clamp
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp)
{
return checkResult(cuTexRefSetMipmapLevelClampNative(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp));
}
private static native int cuTexRefSetMipmapLevelClampNative(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
/**
* Sets the maximum anistropy for a texture reference.
*
*
* CUresult cuTexRefSetMaxAnisotropy (
* CUtexref hTexRef,
* unsigned int maxAniso )
*
*
* Sets the maximum anistropy for a texture
* reference. Specifies the maximum aniostropy maxAniso to be
* used when reading memory through the texture reference hTexRef.
*
* Note that this call has no effect if
* hTexRef is bound to linear memory.
*
*
*
* @param hTexRef Texture reference
* @param maxAniso Maximum anisotropy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetMaxAnisotropy(CUtexref hTexRef, int maxAniso)
{
return checkResult(cuTexRefSetMaxAnisotropyNative(hTexRef, maxAniso));
}
private static native int cuTexRefSetMaxAnisotropyNative(CUtexref hTexRef, int maxAniso);
/**
* Sets the flags for a texture reference.
*
*
* CUresult cuTexRefSetFlags (
* CUtexref hTexRef,
* unsigned int Flags )
*
*
* Sets the flags for a texture reference.
* Specifies optional flags via Flags to specify the behavior
* of data returned through the texture reference hTexRef. The
* valid flags are:
*
*
* -
*
CU_TRSF_READ_AS_INTEGER, which
* suppresses the default behavior of having the texture promote integer
* data to floating point data in the range [0,
* 1]. Note that texture with
* 32-bit integer format would not be promoted, regardless of whether or
* not this flag is specified;
*
*
* -
*
CU_TRSF_NORMALIZED_COORDINATES,
* which suppresses the default behavior of having the texture coordinates
* range from [0, Dim) where Dim is the width or height
* of the CUDA array. Instead, the
* texture coordinates [0, 1.0) reference the entire breadth of the array
* dimension;
*
*
*
*
*
*
* @param hTexRef Texture reference
* @param Flags Optional flags to set
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefSetFlags(CUtexref hTexRef, int Flags)
{
return checkResult(cuTexRefSetFlagsNative(hTexRef, Flags));
}
private static native int cuTexRefSetFlagsNative(CUtexref hTexRef, int Flags);
/**
* Gets the address associated with a texture reference.
*
*
* CUresult cuTexRefGetAddress (
* CUdeviceptr* pdptr,
* CUtexref hTexRef )
*
*
* Gets the address associated with a
* texture reference. Returns in *pdptr the base address bound
* to the texture reference hTexRef, or returns
* CUDA_ERROR_INVALID_VALUE if the texture reference is not bound to any
* device memory range.
*
*
*
* @param pdptr Returned device address
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetAddress(CUdeviceptr pdptr, CUtexref hTexRef)
{
return checkResult(cuTexRefGetAddressNative(pdptr, hTexRef));
}
private static native int cuTexRefGetAddressNative(CUdeviceptr pdptr, CUtexref hTexRef);
/**
* Gets the array bound to a texture reference.
*
*
* CUresult cuTexRefGetArray (
* CUarray* phArray,
* CUtexref hTexRef )
*
*
* Gets the array bound to a texture
* reference. Returns in *phArray the CUDA array bound to the
* texture reference hTexRef, or returns CUDA_ERROR_INVALID_VALUE
* if the texture reference is not bound to any CUDA array.
*
*
*
* @param phArray Returned array
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetArray(CUarray phArray, CUtexref hTexRef)
{
return checkResult(cuTexRefGetArrayNative(phArray, hTexRef));
}
private static native int cuTexRefGetArrayNative(CUarray phArray, CUtexref hTexRef);
/**
* Gets the mipmapped array bound to a texture reference.
*
*
* CUresult cuTexRefGetMipmappedArray (
* CUmipmappedArray* phMipmappedArray,
* CUtexref hTexRef )
*
*
* Gets the mipmapped array bound to a
* texture reference. Returns in *phMipmappedArray the CUDA
* mipmapped array bound to the texture reference hTexRef, or
* returns CUDA_ERROR_INVALID_VALUE if the texture reference is not bound
* to any CUDA mipmapped array.
*
*
*
* @param phMipmappedArray Returned mipmapped array
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetMipmappedArray(CUmipmappedArray phMipmappedArray, CUtexref hTexRef)
{
return checkResult(cuTexRefGetMipmappedArrayNative(phMipmappedArray, hTexRef));
}
private static native int cuTexRefGetMipmappedArrayNative(CUmipmappedArray phMipmappedArray, CUtexref hTexRef);
/**
* Gets the addressing mode used by a texture reference.
*
*
* CUresult cuTexRefGetAddressMode (
* CUaddress_mode* pam,
* CUtexref hTexRef,
* int dim )
*
*
* Gets the addressing mode used by a
* texture reference. Returns in *pam the addressing mode
* corresponding to the dimension dim of the texture reference
* hTexRef. Currently, the only valid value for dim
* are 0 and 1.
*
*
*
* @param pam Returned addressing mode
* @param hTexRef Texture reference
* @param dim Dimension
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetAddressMode(int pam[], CUtexref hTexRef, int dim)
{
return checkResult(cuTexRefGetAddressModeNative(pam, hTexRef, dim));
}
private static native int cuTexRefGetAddressModeNative(int pam[], CUtexref hTexRef, int dim);
/**
* Gets the filter-mode used by a texture reference.
*
*
* CUresult cuTexRefGetFilterMode (
* CUfilter_mode* pfm,
* CUtexref hTexRef )
*
*
* Gets the filter-mode used by a texture
* reference. Returns in *pfm the filtering mode of the texture
* reference hTexRef.
*
*
*
* @param pfm Returned filtering mode
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetFilterMode(int pfm[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetFilterModeNative(pfm, hTexRef));
}
private static native int cuTexRefGetFilterModeNative(int pfm[], CUtexref hTexRef);
/**
* Gets the format used by a texture reference.
*
*
* CUresult cuTexRefGetFormat (
* CUarray_format* pFormat,
* int* pNumChannels,
* CUtexref hTexRef )
*
*
* Gets the format used by a texture
* reference. Returns in *pFormat and *pNumChannels
* the format and number of components of the CUDA array bound to the
* texture reference hTexRef. If pFormat or pNumChannels is NULL, it will be ignored.
*
*
*
* @param pFormat Returned format
* @param pNumChannels Returned number of components
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
*/
public static int cuTexRefGetFormat(int pFormat[], int pNumChannels[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetFormatNative(pFormat, pNumChannels, hTexRef));
}
private static native int cuTexRefGetFormatNative(int pFormat[], int pNumChannels[], CUtexref hTexRef);
/**
* Gets the mipmap filtering mode for a texture reference.
*
*
* CUresult cuTexRefGetMipmapFilterMode (
* CUfilter_mode* pfm,
* CUtexref hTexRef )
*
*
* Gets the mipmap filtering mode for a
* texture reference. Returns the mipmap filtering mode in pfm
* that's used when reading memory through the texture reference hTexRef.
*
*
*
* @param pfm Returned mipmap filtering mode
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetMipmapFilterMode(int pfm[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetMipmapFilterModeNative(pfm, hTexRef));
}
private static native int cuTexRefGetMipmapFilterModeNative(int pfm[], CUtexref hTexRef);
/**
* Gets the mipmap level bias for a texture reference.
*
*
* CUresult cuTexRefGetMipmapLevelBias (
* float* pbias,
* CUtexref hTexRef )
*
*
* Gets the mipmap level bias for a texture
* reference. Returns the mipmap level bias in pBias that's
* added to the specified mipmap level when reading memory through the
* texture reference hTexRef.
*
*
*
* @param pbias Returned mipmap level bias
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetMipmapLevelBias(float pbias[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetMipmapLevelBiasNative(pbias, hTexRef));
}
private static native int cuTexRefGetMipmapLevelBiasNative(float pbias[], CUtexref hTexRef);
/**
* Gets the min/max mipmap level clamps for a texture reference.
*
*
* CUresult cuTexRefGetMipmapLevelClamp (
* float* pminMipmapLevelClamp,
* float* pmaxMipmapLevelClamp,
* CUtexref hTexRef )
*
*
* Gets the min/max mipmap level clamps for
* a texture reference. Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp that's
* used when reading memory through the texture reference hTexRef.
*
*
*
* @param pminMipmapLevelClamp Returned mipmap min level clamp
* @param pmaxMipmapLevelClamp Returned mipmap max level clamp
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetMipmapLevelClamp(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetMipmapLevelClampNative(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef));
}
private static native int cuTexRefGetMipmapLevelClampNative(float pminMipmapLevelClamp[], float pmaxMipmapLevelClamp[], CUtexref hTexRef);
/**
* Gets the maximum anistropy for a texture reference.
*
*
* CUresult cuTexRefGetMaxAnisotropy (
* int* pmaxAniso,
* CUtexref hTexRef )
*
*
* Gets the maximum anistropy for a texture
* reference. Returns the maximum aniostropy in pmaxAniso
* that's used when reading memory through the texture reference hTexRef.
*
*
*
* @param pmaxAniso Returned maximum anisotropy
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFlags
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetMaxAnisotropy(int pmaxAniso[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetMaxAnisotropyNative(pmaxAniso, hTexRef));
}
private static native int cuTexRefGetMaxAnisotropyNative(int pmaxAniso[], CUtexref hTexRef);
/**
* Gets the flags used by a texture reference.
*
*
* CUresult cuTexRefGetFlags (
* unsigned int* pFlags,
* CUtexref hTexRef )
*
*
* Gets the flags used by a texture
* reference. Returns in *pFlags the flags of the texture
* reference hTexRef.
*
*
*
* @param pFlags Returned flags
* @param hTexRef Texture reference
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexRefSetAddress
* @see JCudaDriver#cuTexRefSetAddress2D
* @see JCudaDriver#cuTexRefSetAddressMode
* @see JCudaDriver#cuTexRefSetArray
* @see JCudaDriver#cuTexRefSetFilterMode
* @see JCudaDriver#cuTexRefSetFlags
* @see JCudaDriver#cuTexRefSetFormat
* @see JCudaDriver#cuTexRefGetAddress
* @see JCudaDriver#cuTexRefGetAddressMode
* @see JCudaDriver#cuTexRefGetArray
* @see JCudaDriver#cuTexRefGetFilterMode
* @see JCudaDriver#cuTexRefGetFormat
*/
public static int cuTexRefGetFlags(int pFlags[], CUtexref hTexRef)
{
return checkResult(cuTexRefGetFlagsNative(pFlags, hTexRef));
}
private static native int cuTexRefGetFlagsNative(int pFlags[], CUtexref hTexRef);
/**
* Sets the CUDA array for a surface reference.
*
*
* CUresult cuSurfRefSetArray (
* CUsurfref hSurfRef,
* CUarray hArray,
* unsigned int Flags )
*
*
* Sets the CUDA array for a surface
* reference. Sets the CUDA array hArray to be read and written
* by the surface reference hSurfRef. Any previous CUDA array
* state associated with the surface reference is superseded by this
* function. Flags must be set to 0. The CUDA_ARRAY3D_SURFACE_LDST
* flag must have been set for the CUDA array. Any CUDA array previously
* bound to hSurfRef is unbound.
*
*
*
* @param hSurfRef Surface reference handle
* @param hArray CUDA array handle
* @param Flags set to 0
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuModuleGetSurfRef
* @see JCudaDriver#cuSurfRefGetArray
*/
public static int cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, int Flags )
{
return checkResult(cuSurfRefSetArrayNative(hSurfRef, hArray, Flags));
}
private static native int cuSurfRefSetArrayNative(CUsurfref hSurfRef, CUarray hArray, int Flags );
/**
* Passes back the CUDA array bound to a surface reference.
*
*
* CUresult cuSurfRefGetArray (
* CUarray* phArray,
* CUsurfref hSurfRef )
*
*
* Passes back the CUDA array bound to a
* surface reference. Returns in *phArray the CUDA array bound
* to the surface reference hSurfRef, or returns
* CUDA_ERROR_INVALID_VALUE if the surface reference is not bound to any
* CUDA array.
*
*
*
* @param phArray Surface reference handle
* @param hSurfRef Surface reference handle
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuModuleGetSurfRef
* @see JCudaDriver#cuSurfRefSetArray
*/
public static int cuSurfRefGetArray( CUarray phArray, CUsurfref hSurfRef )
{
return checkResult(cuSurfRefGetArrayNative(phArray, hSurfRef));
}
private static native int cuSurfRefGetArrayNative( CUarray phArray, CUsurfref hSurfRef );
/**
* Creates a texture object.
*
*
* CUresult cuTexObjectCreate (
* CUtexObject* pTexObject,
* const CUDA_RESOURCE_DESC* pResDesc,
* const CUDA_TEXTURE_DESC* pTexDesc,
* const CUDA_RESOURCE_VIEW_DESC* pResViewDesc )
*
*
* Creates a texture object. Creates a
* texture object and returns it in pTexObject. pResDesc
* describes the data to texture from. pTexDesc describes how
* the data should be sampled. pResViewDesc is an optional
* argument that specifies an alternate format for the data described by
* pResDesc, and also describes the subresource region to
* restrict access to when texturing. pResViewDesc can only be
* specified if the type of resource is a CUDA array or a CUDA mipmapped
* array.
*
* Texture objects are only supported on
* devices of compute capability 3.0 or higher.
*
* The CUDA_RESOURCE_DESC structure is
* defined as:
*
typedef struct CUDA_RESOURCE_DESC_st
* {
* CUresourcetype resType;
*
* union {
* struct {
* CUarray hArray;
* } array;
* struct {
* CUmipmappedArray hMipmappedArray;
* } mipmap;
* struct {
* CUdeviceptr devPtr;
* CUarray_format format;
* unsigned int numChannels;
* size_t sizeInBytes;
* } linear;
* struct {
* CUdeviceptr devPtr;
* CUarray_format format;
* unsigned int numChannels;
* size_t width;
* size_t height;
* size_t pitchInBytes;
* } pitch2D;
* } res;
*
* unsigned int flags;
* } CUDA_RESOURCE_DESC;
* where:
*
* -
*
* CUDA_RESOURCE_DESC::resType
* specifies the type of resource to texture from. CUresourceType is
* defined as:
*
typedef enum CUresourcetype_enum {
* CU_RESOURCE_TYPE_ARRAY = 0x00,
* CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
* CU_RESOURCE_TYPE_LINEAR = 0x02,
* CU_RESOURCE_TYPE_PITCH2D = 0x03
* } CUresourcetype;
*
*
*
*
* If CUDA_RESOURCE_DESC::resType is set
* to CU_RESOURCE_TYPE_ARRAY, CUDA_RESOURCE_DESC::res::array::hArray must
* be set to a valid CUDA array handle.
*
* If CUDA_RESOURCE_DESC::resType is set
* to CU_RESOURCE_TYPE_MIPMAPPED_ARRAY,
* CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray must be set to a valid
* CUDA mipmapped array handle.
*
* If CUDA_RESOURCE_DESC::resType is set
* to CU_RESOURCE_TYPE_LINEAR, CUDA_RESOURCE_DESC::res::linear::devPtr
* must be set to a valid device pointer, that is aligned to
* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::linear::format
* and CUDA_RESOURCE_DESC::res::linear::numChannels describe the format
* of each component
* and the number of components per array
* element. CUDA_RESOURCE_DESC::res::linear::sizeInBytes specifies the
* size of the array
* in bytes. The total number of elements
* in the linear address range cannot exceed
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of
* elements is computed as (sizeInBytes / (sizeof(format) *
* numChannels)).
*
* If CUDA_RESOURCE_DESC::resType is set
* to CU_RESOURCE_TYPE_PITCH2D, CUDA_RESOURCE_DESC::res::pitch2D::devPtr
* must be set to a valid device pointer, that is aligned to
* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. CUDA_RESOURCE_DESC::res::pitch2D::format
* and CUDA_RESOURCE_DESC::res::pitch2D::numChannels describe the format
* of each component
* and the number of components per array
* element. CUDA_RESOURCE_DESC::res::pitch2D::width and
* CUDA_RESOURCE_DESC::res::pitch2D::height
* specify the width and height of the array
* in elements, and cannot exceed CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
* and CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively.
* CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch
* between two rows in bytes and has to be
* aligned to
* CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed
* CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
*
*
* -
*
flags must be set to zero.
*
*
*
* The CUDA_TEXTURE_DESC struct is defined
* as
*
typedef struct CUDA_TEXTURE_DESC_st {
* CUaddress_mode addressMode[3];
* CUfilter_mode filterMode;
* unsigned int flags;
* unsigned int maxAnisotropy;
* CUfilter_mode mipmapFilterMode;
* float mipmapLevelBias;
* float minMipmapLevelClamp;
* float maxMipmapLevelClamp;
* } CUDA_TEXTURE_DESC;
* where
*
* -
*
* CUDA_TEXTURE_DESC::addressMode
* specifies the addressing mode for each dimension of the texture data.
* CUaddress_mode is defined as:
*
typedef enum
* CUaddress_mode_enum {
* CU_TR_ADDRESS_MODE_WRAP = 0,
* CU_TR_ADDRESS_MODE_CLAMP = 1,
* CU_TR_ADDRESS_MODE_MIRROR = 2,
* CU_TR_ADDRESS_MODE_BORDER = 3
* } CUaddress_mode;
* This is ignored if
* CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR. Also, if the
* flag, CU_TRSF_NORMALIZED_COORDINATES is not set, the only supported
* address mode is CU_TR_ADDRESS_MODE_CLAMP.
*
*
*
*
*
* -
*
* CUDA_TEXTURE_DESC::filterMode
* specifies the filtering mode to be used when fetching from the texture.
* CUfilter_mode is defined as:
*
typedef enum CUfilter_mode_enum
* {
* CU_TR_FILTER_MODE_POINT = 0,
* CU_TR_FILTER_MODE_LINEAR = 1
* } CUfilter_mode;
* This is ignored if
* CUDA_RESOURCE_DESC::resType is CU_RESOURCE_TYPE_LINEAR.
*
*
*
*
*
* -
*
* CUDA_TEXTURE_DESC::flags can
* be any combination of the following:
*
* -
*
CU_TRSF_READ_AS_INTEGER,
* which suppresses the default behavior of having the texture promote
* integer data to floating point data in the range [0,
* 1]. Note that texture
* with 32-bit integer format would not be promoted, regardless of whether
* or not this flag is specified.
*
*
* -
*
CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior
* of having the texture coordinates range from [0, Dim) where Dim is the
* width or height
* of the CUDA array.
* Instead, the texture coordinates [0, 1.0) reference the entire breadth
* of the array dimension; Note that
* for CUDA mipmapped
* arrays, this flag has to be set.
*
*
*
*
*
*
*
*
* -
*
CUDA_TEXTURE_DESC::maxAnisotropy
* specifies the maximum anistropy ratio to be used when doing anisotropic
* filtering. This value will be clamped to the range
* [1,16].
*
*
*
*
*
* -
*
CUDA_TEXTURE_DESC::mipmapFilterMode
* specifies the filter mode when the calculated mipmap level lies between
* two defined mipmap levels.
*
*
*
*
*
* -
*
CUDA_TEXTURE_DESC::mipmapLevelBias
* specifies the offset to be applied to the calculated mipmap level.
*
*
*
*
*
* -
*
CUDA_TEXTURE_DESC::minMipmapLevelClamp
* specifies the lower end of the mipmap level range to clamp access to.
*
*
*
*
*
* -
*
CUDA_TEXTURE_DESC::maxMipmapLevelClamp
* specifies the upper end of the mipmap level range to clamp access to.
*
*
*
*
* The CUDA_RESOURCE_VIEW_DESC struct is
* defined as
*
typedef struct CUDA_RESOURCE_VIEW_DESC_st
* {
* CUresourceViewFormat format;
* size_t width;
* size_t height;
* size_t depth;
* unsigned int firstMipmapLevel;
* unsigned int lastMipmapLevel;
* unsigned int firstLayer;
* unsigned int lastLayer;
* } CUDA_RESOURCE_VIEW_DESC;
* where:
*
* -
*
CUDA_RESOURCE_VIEW_DESC::format
* specifies how the data contained in the CUDA array or CUDA mipmapped
* array should be interpreted. Note that this can incur
* a change in size of the texture
* data. If the resource view format is a block compressed format, then
* the underlying CUDA array
* or CUDA mipmapped array has to
* have a base of format CU_AD_FORMAT_UNSIGNED_INT32. with 2 or 4 channels,
* depending on the block compressed format. For ex., BC1 and BC4 require
* the underlying CUDA array to
* have a format of
* CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats
* require the underlying resource to have the same base format but with
* 4 channels.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::width
* specifies the new width of the texture data. If the resource view
* format is a block compressed format, this value has to
* be 4 times the original width
* of the resource. For non block compressed formats, this value has to
* be equal to that of the
* original resource.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::height
* specifies the new height of the texture data. If the resource view
* format is a block compressed format, this value has to
* be 4 times the original height
* of the resource. For non block compressed formats, this value has to
* be equal to that of the
* original resource.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::depth
* specifies the new depth of the texture data. This value has to be equal
* to that of the original resource.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed
* mipmap level. This will be the new mipmap level zero. For non-mipmapped
* resources, this value
* has to be
* zero.CUDA_TEXTURE_DESC::minMipmapLevelClamp and
* CUDA_TEXTURE_DESC::maxMipmapLevelClamp will be relative to this value.
* For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp
* of 1.2 is specified,
* then the actual minimum mipmap
* level clamp will be 3.2.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel
* specifies the least detailed mipmap level. For non-mipmapped resources,
* this value has to be zero.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::firstLayer
* specifies the first layer index for layered textures. This will be the
* new layer zero. For non-layered resources, this value
* has to be zero.
*
*
*
*
*
* -
*
CUDA_RESOURCE_VIEW_DESC::lastLayer
* specifies the last layer index for layered textures. For non-layered
* resources, this value has to be zero.
*
*
*
*
*
*
* @param pTexObject Texture object to create
* @param pResDesc Resource descriptor
* @param pTexDesc Texture descriptor
* @param pResViewDesc Resource view descriptor
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexObjectDestroy
*/
public static int cuTexObjectCreate(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc)
{
return checkResult(cuTexObjectCreateNative(pTexObject, pResDesc, pTexDesc, pResViewDesc));
}
private static native int cuTexObjectCreateNative(CUtexObject pTexObject, CUDA_RESOURCE_DESC pResDesc, CUDA_TEXTURE_DESC pTexDesc, CUDA_RESOURCE_VIEW_DESC pResViewDesc);
/**
* Destroys a texture object.
*
*
* CUresult cuTexObjectDestroy (
* CUtexObject texObject )
*
*
* Destroys a texture object. Destroys the
* texture object specified by texObject.
*
*
*
* @param texObject Texture object to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexObjectCreate
*/
public static int cuTexObjectDestroy(CUtexObject texObject)
{
return checkResult(cuTexObjectDestroyNative(texObject));
}
private static native int cuTexObjectDestroyNative(CUtexObject texObject);
/**
* Returns a texture object's resource descriptor.
*
*
* CUresult cuTexObjectGetResourceDesc (
* CUDA_RESOURCE_DESC* pResDesc,
* CUtexObject texObject )
*
*
* Returns a texture object's resource
* descriptor. Returns the resource descriptor for the texture object
* specified by texObject.
*
*
*
* @param pResDesc Resource descriptor
* @param texObject Texture object
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexObjectCreate
*/
public static int cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject)
{
return checkResult(cuTexObjectGetResourceDescNative(pResDesc, texObject));
}
private static native int cuTexObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUtexObject texObject);
/**
* Returns a texture object's texture descriptor.
*
*
* CUresult cuTexObjectGetTextureDesc (
* CUDA_TEXTURE_DESC* pTexDesc,
* CUtexObject texObject )
*
*
* Returns a texture object's texture
* descriptor. Returns the texture descriptor for the texture object
* specified by texObject.
*
*
*
* @param pTexDesc Texture descriptor
* @param texObject Texture object
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexObjectCreate
*/
public static int cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject)
{
return checkResult(cuTexObjectGetTextureDescNative(pTexDesc, texObject));
}
private static native int cuTexObjectGetTextureDescNative(CUDA_TEXTURE_DESC pTexDesc, CUtexObject texObject);
/**
* Returns a texture object's resource view descriptor.
*
*
* CUresult cuTexObjectGetResourceViewDesc (
* CUDA_RESOURCE_VIEW_DESC* pResViewDesc,
* CUtexObject texObject )
*
*
* Returns a texture object's resource view
* descriptor. Returns the resource view descriptor for the texture
* object specified
* by texObject. If no resource
* view was set for texObject, the CUDA_ERROR_INVALID_VALUE is
* returned.
*
*
*
* @param pResViewDesc Resource view descriptor
* @param texObject Texture object
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuTexObjectCreate
*/
public static int cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject)
{
return checkResult(cuTexObjectGetResourceViewDescNative(pResViewDesc, texObject));
}
private static native int cuTexObjectGetResourceViewDescNative(CUDA_RESOURCE_VIEW_DESC pResViewDesc, CUtexObject texObject);
/**
* Creates a surface object.
*
*
* CUresult cuSurfObjectCreate (
* CUsurfObject* pSurfObject,
* const CUDA_RESOURCE_DESC* pResDesc )
*
*
* Creates a surface object. Creates a
* surface object and returns it in pSurfObject. pResDesc describes the data to perform surface load/stores on.
* CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and
* CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA
* array handle. CUDA_RESOURCE_DESC::flags must be set to zero.
*
* Surface objects are only supported on
* devices of compute capability 3.0 or higher.
*
*
*
* @param pSurfObject Surface object to create
* @param pResDesc Resource descriptor
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuSurfObjectDestroy
*/
public static int cuSurfObjectCreate(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc)
{
return checkResult(cuSurfObjectCreateNative(pSurfObject, pResDesc));
}
private static native int cuSurfObjectCreateNative(CUsurfObject pSurfObject, CUDA_RESOURCE_DESC pResDesc);
/**
* Destroys a surface object.
*
*
* CUresult cuSurfObjectDestroy (
* CUsurfObject surfObject )
*
*
* Destroys a surface object. Destroys the
* surface object specified by surfObject.
*
*
*
* @param surfObject Surface object to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuSurfObjectCreate
*/
public static int cuSurfObjectDestroy(CUsurfObject surfObject)
{
return checkResult(cuSurfObjectDestroyNative(surfObject));
}
private static native int cuSurfObjectDestroyNative(CUsurfObject surfObject);
/**
* Returns a surface object's resource descriptor.
*
*
* CUresult cuSurfObjectGetResourceDesc (
* CUDA_RESOURCE_DESC* pResDesc,
* CUsurfObject surfObject )
*
*
* Returns a surface object's resource
* descriptor. Returns the resource descriptor for the surface object
* specified by surfObject.
*
*
*
* @param pResDesc Resource descriptor
* @param surfObject Surface object
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuSurfObjectCreate
*/
public static int cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject)
{
return checkResult(cuSurfObjectGetResourceDescNative(pResDesc, surfObject));
}
private static native int cuSurfObjectGetResourceDescNative(CUDA_RESOURCE_DESC pResDesc, CUsurfObject surfObject);
/**
* Queries if a device may directly access a peer device's memory.
*
*
* CUresult cuDeviceCanAccessPeer (
* int* canAccessPeer,
* CUdevice dev,
* CUdevice peerDev )
*
*
* Queries if a device may directly access
* a peer device's memory. Returns in *canAccessPeer a value
* of 1 if contexts on dev are capable of directly accessing
* memory from contexts on peerDev and 0 otherwise. If direct
* access of peerDev from dev is possible, then access
* may be enabled on two specific contexts by calling
* cuCtxEnablePeerAccess().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param canAccessPeer Returned access capability
* @param dev Device from which allocations on peerDev are to be directly accessed.
* @param peerDev Device on which the allocations to be directly accessed by dev reside.
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuCtxEnablePeerAccess
* @see JCudaDriver#cuCtxDisablePeerAccess
*/
public static int cuDeviceCanAccessPeer(int canAccessPeer[], CUdevice dev, CUdevice peerDev)
{
return checkResult(cuDeviceCanAccessPeerNative(canAccessPeer, dev, peerDev));
}
private static native int cuDeviceCanAccessPeerNative(int canAccessPeer[], CUdevice dev, CUdevice peerDev);
/**
* Enables direct access to memory allocations in a peer context.
*
*
* CUresult cuCtxEnablePeerAccess (
* CUcontext peerContext,
* unsigned int Flags )
*
*
* Enables direct access to memory
* allocations in a peer context. If both the current context and peerContext are on devices which support unified addressing (as
* may be queried using CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same
* major compute capability, then on success all allocations from peerContext will immediately be accessible by the current context.
* See Unified Addressing for additional details.
*
* Note that access granted by this call
* is unidirectional and that in order to access memory from the current
* context in peerContext, a separate symmetric call to
* cuCtxEnablePeerAccess() is required.
*
* Returns CUDA_ERROR_PEER_ACCESS_UNSUPPORTED
* if cuDeviceCanAccessPeer() indicates that the CUdevice of the current
* context cannot directly access memory from the CUdevice of peerContext.
*
* Returns CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
* if direct access of peerContext from the current context has
* already been enabled.
*
* Returns CUDA_ERROR_TOO_MANY_PEERS if
* direct peer access is not possible because hardware resources required
* for peer access have been exhausted.
*
* Returns CUDA_ERROR_INVALID_CONTEXT if
* there is no current context, peerContext is not a valid
* context, or if the current context is peerContext.
*
* Returns CUDA_ERROR_INVALID_VALUE if Flags is not 0.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param peerContext Peer context to enable direct access to from the current context
* @param Flags Reserved for future use and must be set to 0
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_TOO_MANY_PEERS,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
* CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuDeviceCanAccessPeer
* @see JCudaDriver#cuCtxDisablePeerAccess
*/
public static int cuCtxEnablePeerAccess(CUcontext peerContext, int Flags)
{
return checkResult(cuCtxEnablePeerAccessNative(peerContext, Flags));
}
private static native int cuCtxEnablePeerAccessNative(CUcontext peerContext, int Flags);
/**
* Disables direct access to memory allocations in a peer context and unregisters any registered allocations.
*
*
* CUresult cuCtxDisablePeerAccess (
* CUcontext peerContext )
*
*
* Disables direct access to memory
* allocations in a peer context and unregisters any registered allocations.
* Returns CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has
* not yet been enabled from peerContext to the current
* context.
*
* Returns CUDA_ERROR_INVALID_CONTEXT if
* there is no current context, or if peerContext is not a valid
* context.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param peerContext Peer context to disable direct access to
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_INVALID_CONTEXT,
*
* @see JCudaDriver#cuDeviceCanAccessPeer
* @see JCudaDriver#cuCtxEnablePeerAccess
*/
public static int cuCtxDisablePeerAccess(CUcontext peerContext)
{
return checkResult(cuCtxDisablePeerAccessNative(peerContext));
}
private static native int cuCtxDisablePeerAccessNative(CUcontext peerContext);
/**
* Sets the parameter size for the function.
*
*
* CUresult cuParamSetSize (
* CUfunction hfunc,
* unsigned int numbytes )
*
*
* Sets the parameter size for the function.
* DeprecatedSets through numbytes
* the total size in bytes needed by the function parameters of the kernel
* corresponding to hfunc.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to set parameter size for
* @param numbytes Size of parameter list in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuParamSetSize(CUfunction hfunc, int numbytes)
{
return checkResult(cuParamSetSizeNative(hfunc, numbytes));
}
private static native int cuParamSetSizeNative(CUfunction hfunc, int numbytes);
/**
* Adds an integer parameter to the function's argument list.
*
*
* CUresult cuParamSeti (
* CUfunction hfunc,
* int offset,
* unsigned int value )
*
*
* Adds an integer parameter to the
* function's argument list.
* DeprecatedSets an integer parameter that
* will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to add parameter to
* @param offset Offset to add parameter to argument list
* @param value Value of parameter
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuParamSeti(CUfunction hfunc, int offset, int value)
{
return checkResult(cuParamSetiNative(hfunc, offset, value));
}
private static native int cuParamSetiNative(CUfunction hfunc, int offset, int value);
/**
* Adds a floating-point parameter to the function's argument list.
*
*
* CUresult cuParamSetf (
* CUfunction hfunc,
* int offset,
* float value )
*
*
* Adds a floating-point parameter to the
* function's argument list.
* DeprecatedSets a floating-point parameter
* that will be specified the next time the kernel corresponding to hfunc will be invoked. offset is a byte offset.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to add parameter to
* @param offset Offset to add parameter to argument list
* @param value Value of parameter
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuParamSetf(CUfunction hfunc, int offset, float value)
{
return checkResult(cuParamSetfNative(hfunc, offset, value));
}
private static native int cuParamSetfNative(CUfunction hfunc, int offset, float value);
/**
* Adds arbitrary data to the function's argument list.
*
*
* CUresult cuParamSetv (
* CUfunction hfunc,
* int offset,
* void* ptr,
* unsigned int numbytes )
*
*
* Adds arbitrary data to the function's
* argument list.
* DeprecatedCopies an arbitrary amount of
* data (specified in numbytes) from ptr into the
* parameter space of the kernel corresponding to hfunc. offset is a byte offset.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to add data to
* @param offset Offset to add data to argument list
* @param ptr Pointer to arbitrary data
* @param numbytes Size of data to copy in bytes
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuParamSetv(CUfunction hfunc, int offset, Pointer ptr, int numbytes)
{
return checkResult(cuParamSetvNative(hfunc, offset, ptr, numbytes));
}
private static native int cuParamSetvNative(CUfunction hfunc, int offset, Pointer ptr, int numbytes);
/**
* Adds a texture-reference to the function's argument list.
*
*
* CUresult cuParamSetTexRef (
* CUfunction hfunc,
* int texunit,
* CUtexref hTexRef )
*
*
* Adds a texture-reference to the function's
* argument list.
* DeprecatedMakes the CUDA array or linear
* memory bound to the texture reference hTexRef available to a
* device program as a texture. In this version of CUDA, the
* texture-reference must be obtained via cuModuleGetTexRef() and the texunit parameter must be set to CU_PARAM_TR_DEFAULT.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hfunc Kernel to add texture-reference to
* @param texunit Texture unit (must be CU_PARAM_TR_DEFAULT)
* @param hTexRef Texture-reference to add to argument list
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
*/
public static int cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef)
{
return checkResult(cuParamSetTexRefNative(hfunc, texunit, hTexRef));
}
private static native int cuParamSetTexRefNative(CUfunction hfunc, int texunit, CUtexref hTexRef);
/**
*
* \brief Returns occupancy of a function
*
* Returns in \p *numBlocks the number of the maximum active blocks per
* streaming multiprocessor.
*
* \param numBlocks - Returned occupancy
* \param func - Kernel for which occupancy is calulated
* \param blockSize - Block size the kernel is intended to be launched with
* \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
*/
public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize)
{
return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorNative(numBlocks, func, blockSize, dynamicSMemSize));
}
private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize);
public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags)
{
return checkResult(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(numBlocks, func, blockSize, dynamicSMemSize, flags));
}
private static native int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlagsNative(int numBlocks[], CUfunction func, int blockSize, long dynamicSMemSize, int flags);
/**
*
* \brief Suggest a launch configuration with reasonable occupancy
*
* Returns in \p *blockSize a reasonable block size that can achieve
* the maximum occupancy (or, the maximum number of active warps with
* the fewest blocks per multiprocessor), and in \p *minGridSize the
* minimum grid size to achieve the maximum occupancy.
*
* If \p blockSizeLimit is 0, the configurator will use the maximum
* block size permitted by the device / function instead.
*
* If per-block dynamic shared memory allocation is not needed, the
* user should leave both \p blockSizeToDynamicSMemSize and \p
* dynamicSMemSize as 0.
*
* If per-block dynamic shared memory allocation is needed, then if
* the dynamic shared memory size is constant regardless of block
* size, the size should be passed through \p dynamicSMemSize, and \p
* blockSizeToDynamicSMemSize should be NULL.
*
* Otherwise, if the per-block dynamic shared memory size varies with
* different block sizes, the user needs to provide a unary function
* through \p blockSizeToDynamicSMemSize that computes the dynamic
* shared memory needed by \p func for any given block size. \p
* dynamicSMemSize is ignored. An example signature is:
*
* \code
* // Take block size, returns dynamic shared memory needed
* size_t blockToSmem(int blockSize);
* \endcode
*
* \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
* \param blockSize - Returned maximum block size that can achieve the maximum occupancy
* \param func - Kernel for which launch configuration is calulated
* \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
* \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
* \param blockSizeLimit - The maximum block size \p func is designed to handle
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
*/
public static int cuOccupancyMaxPotentialBlockSize(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit)
{
// The callback involves a state on the native side,
// so ensure synchronization here
synchronized (OCCUPANCY_LOCK)
{
return checkResult(cuOccupancyMaxPotentialBlockSizeNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit));
}
}
private static native int cuOccupancyMaxPotentialBlockSizeNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit);
public static int cuOccupancyMaxPotentialBlockSizeWithFlags(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags)
{
// The callback involves a state on the native side,
// so ensure synchronization here
synchronized (OCCUPANCY_LOCK)
{
return checkResult(cuOccupancyMaxPotentialBlockSizeWithFlagsNative(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags));
}
}
private static native int cuOccupancyMaxPotentialBlockSizeWithFlagsNative(int minGridSize[], int blockSize[], CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags);
private static final Object OCCUPANCY_LOCK = new Object();
/**
* Launches a CUDA function.
*
*
* CUresult cuLaunch (
* CUfunction f )
*
*
* Launches a CUDA function.
* DeprecatedInvokes the kernel f
* on a 1 x 1 x 1 grid of blocks. The block contains the number of threads
* specified by a previous call to cuFuncSetBlockShape().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param f Kernel to launch
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuLaunch(CUfunction f)
{
return checkResult(cuLaunchNative(f));
}
private static native int cuLaunchNative(CUfunction f);
/**
* Launches a CUDA function.
*
*
* CUresult cuLaunchGrid (
* CUfunction f,
* int grid_width,
* int grid_height )
*
*
* Launches a CUDA function.
* DeprecatedInvokes the kernel f
* on a grid_width x grid_height grid of blocks. Each
* block contains the number of threads specified by a previous call to
* cuFuncSetBlockShape().
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param f Kernel to launch
* @param grid_width Width of grid in blocks
* @param grid_height Height of grid in blocks
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGridAsync
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
{
return checkResult(cuLaunchGridNative(f, grid_width, grid_height));
}
private static native int cuLaunchGridNative(CUfunction f, int grid_width, int grid_height);
/**
* Launches a CUDA function.
*
*
* CUresult cuLaunchGridAsync (
* CUfunction f,
* int grid_width,
* int grid_height,
* CUstream hStream )
*
*
* Launches a CUDA function.
* DeprecatedInvokes the kernel f
* on a grid_width x grid_height grid of blocks. Each
* block contains the number of threads specified by a previous call to
* cuFuncSetBlockShape().
*
* cuLaunchGridAsync() can optionally be
* associated to a stream by passing a non-zero hStream
* argument.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param f Kernel to launch
* @param grid_width Width of grid in blocks
* @param grid_height Height of grid in blocks
* @param hStream Stream identifier
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_LAUNCH_FAILED,
* CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT,
* CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuFuncSetBlockShape
* @see JCudaDriver#cuFuncSetSharedSize
* @see JCudaDriver#cuFuncGetAttribute
* @see JCudaDriver#cuParamSetSize
* @see JCudaDriver#cuParamSetf
* @see JCudaDriver#cuParamSeti
* @see JCudaDriver#cuParamSetv
* @see JCudaDriver#cuLaunch
* @see JCudaDriver#cuLaunchGrid
* @see JCudaDriver#cuLaunchKernel
*/
public static int cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream)
{
return checkResult(cuLaunchGridAsyncNative(f, grid_width, grid_height, hStream));
}
private static native int cuLaunchGridAsyncNative(CUfunction f, int grid_width, int grid_height, CUstream hStream);
/**
* Creates an event.
*
*
* CUresult cuEventCreate (
* CUevent* phEvent,
* unsigned int Flags )
*
*
* Creates an event. Creates an event
* *phEvent with the flags specified via Flags. Valid flags
* include:
*
* -
*
CU_EVENT_DEFAULT: Default event
* creation flag.
*
*
* -
*
CU_EVENT_BLOCKING_SYNC:
* Specifies that the created event should use blocking synchronization.
* A CPU thread that uses cuEventSynchronize() to wait on an event created
* with this flag will block until the event has actually been recorded.
*
*
* -
*
CU_EVENT_DISABLE_TIMING:
* Specifies that the created event does not need to record timing data.
* Events created with this flag specified and the CU_EVENT_BLOCKING_SYNC
* flag not specified will provide the best performance when used with
* cuStreamWaitEvent() and cuEventQuery().
*
*
* -
*
CU_EVENT_INTERPROCESS: Specifies
* that the created event may be used as an interprocess event by
* cuIpcGetEventHandle(). CU_EVENT_INTERPROCESS must be specified along
* with CU_EVENT_DISABLE_TIMING.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param phEvent Returns newly created event
* @param Flags Event creation flags
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventElapsedTime
*/
public static int cuEventCreate(CUevent phEvent, int Flags)
{
return checkResult(cuEventCreateNative(phEvent, Flags));
}
private static native int cuEventCreateNative(CUevent phEvent, int Flags);
/**
* Records an event.
*
*
* CUresult cuEventRecord (
* CUevent hEvent,
* CUstream hStream )
*
*
* Records an event. Records an event. If
* hStream is non-zero, the event is recorded after all preceding
* operations in hStream have been completed; otherwise, it is
* recorded after all preceding operations in the CUDA context have been
* completed. Since
* operation is asynchronous, cuEventQuery
* and/or cuEventSynchronize() must be used to determine when the event
* has actually been recorded.
*
* If cuEventRecord() has previously been
* called on hEvent, then this call will overwrite any existing
* state in hEvent. Any subsequent calls which examine the
* status of hEvent will only examine the completion of this
* most recent call to cuEventRecord().
*
* It is necessary that hEvent
* and hStream be created on the same context.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hEvent Event to record
* @param hStream Stream to record event for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventElapsedTime
*/
public static int cuEventRecord(CUevent hEvent, CUstream hStream)
{
return checkResult(cuEventRecordNative(hEvent, hStream));
}
private static native int cuEventRecordNative(CUevent hEvent, CUstream hStream);
/**
* Queries an event's status.
*
*
* CUresult cuEventQuery (
* CUevent hEvent )
*
*
* Queries an event's status. Query the
* status of all device work preceding the most recent call to
* cuEventRecord() (in the appropriate compute streams, as specified by
* the arguments to cuEventRecord()).
*
* If this work has successfully been
* completed by the device, or if cuEventRecord() has not been called on
* hEvent, then CUDA_SUCCESS is returned. If this work has not
* yet been completed by the device then CUDA_ERROR_NOT_READY is
* returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hEvent Event to query
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_NOT_READY
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventElapsedTime
*/
public static int cuEventQuery(CUevent hEvent)
{
return checkResult(cuEventQueryNative(hEvent));
}
private static native int cuEventQueryNative(CUevent hEvent);
/**
* Waits for an event to complete.
*
*
* CUresult cuEventSynchronize (
* CUevent hEvent )
*
*
* Waits for an event to complete. Wait
* until the completion of all device work preceding the most recent call
* to cuEventRecord() (in the appropriate compute streams, as specified
* by the arguments to cuEventRecord()).
*
* If cuEventRecord() has not been called
* on hEvent, CUDA_SUCCESS is returned immediately.
*
* Waiting for an event that was created
* with the CU_EVENT_BLOCKING_SYNC flag will cause the calling CPU thread
* to block until the event has been completed by the device. If the
* CU_EVENT_BLOCKING_SYNC flag has not been set, then the CPU thread will
* busy-wait until the event has been completed by the device.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hEvent Event to wait for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuEventDestroy
* @see JCudaDriver#cuEventElapsedTime
*/
public static int cuEventSynchronize(CUevent hEvent)
{
return checkResult(cuEventSynchronizeNative(hEvent));
}
private static native int cuEventSynchronizeNative(CUevent hEvent);
/**
* Destroys an event.
*
*
* CUresult cuEventDestroy (
* CUevent hEvent )
*
*
* Destroys an event. Destroys the event
* specified by hEvent.
*
* In case hEvent has been
* recorded but has not yet been completed when cuEventDestroy() is
* called, the function will return immediately and the resources
* associated with hEvent will be released automatically once
* the device has completed hEvent.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hEvent Event to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventElapsedTime
*/
public static int cuEventDestroy(CUevent hEvent)
{
return checkResult(cuEventDestroyNative(hEvent));
}
private static native int cuEventDestroyNative(CUevent hEvent);
/**
* Computes the elapsed time between two events.
*
*
* CUresult cuEventElapsedTime (
* float* pMilliseconds,
* CUevent hStart,
* CUevent hEnd )
*
*
* Computes the elapsed time between two
* events. Computes the elapsed time between two events (in milliseconds
* with a resolution
* of around 0.5 microseconds).
*
* If either event was last recorded in a
* non-NULL stream, the resulting time may be greater than expected (even
* if both used
* the same stream handle). This happens
* because the cuEventRecord() operation takes place asynchronously and
* there is no guarantee that the measured latency is actually just
* between the two
* events. Any number of other different
* stream operations could execute in between the two measured events,
* thus altering the
* timing in a significant way.
*
* If cuEventRecord() has not been called
* on either event then CUDA_ERROR_INVALID_HANDLE is returned. If
* cuEventRecord() has been called on both events but one or both of them
* has not yet been completed (that is, cuEventQuery() would return
* CUDA_ERROR_NOT_READY on at least one of the events), CUDA_ERROR_NOT_READY
* is returned. If either event was created with the CU_EVENT_DISABLE_TIMING
* flag, then this function will return CUDA_ERROR_INVALID_HANDLE.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pMilliseconds Time between hStart and hEnd in ms
* @param hStart Starting event
* @param hEnd Ending event
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_READY
*
* @see JCudaDriver#cuEventCreate
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuEventQuery
* @see JCudaDriver#cuEventSynchronize
* @see JCudaDriver#cuEventDestroy
*/
public static int cuEventElapsedTime(float pMilliseconds[], CUevent hStart, CUevent hEnd)
{
return checkResult(cuEventElapsedTimeNative(pMilliseconds, hStart, hEnd));
}
private static native int cuEventElapsedTimeNative(float pMilliseconds[], CUevent hStart, CUevent hEnd);
/**
* Returns information about a pointer.
*
*
* CUresult cuPointerGetAttribute (
* void* data,
* CUpointer_attribute attribute,
* CUdeviceptr ptr )
*
*
* Returns information about a pointer.
* The supported attributes are:
*
*
* -
*
CU_POINTER_ATTRIBUTE_CONTEXT:
*
*
*
*
* Returns in *data the CUcontext
* in which ptr was allocated or registered. The type of data must be CUcontext *.
*
* If ptr was not allocated by,
* mapped by, or registered with a CUcontext which uses unified virtual
* addressing then CUDA_ERROR_INVALID_VALUE is returned.
*
*
* -
*
CU_POINTER_ATTRIBUTE_MEMORY_TYPE:
*
*
*
*
* Returns in *data the physical
* memory type of the memory that ptr addresses as a CUmemorytype
* enumerated value. The type of data must be unsigned int.
*
* If ptr addresses device memory
* then *data is set to CU_MEMORYTYPE_DEVICE. The particular
* CUdevice on which the memory resides is the CUdevice of the CUcontext
* returned by the CU_POINTER_ATTRIBUTE_CONTEXT attribute of ptr.
*
* If ptr addresses host memory
* then *data is set to CU_MEMORYTYPE_HOST.
*
* If ptr was not allocated by,
* mapped by, or registered with a CUcontext which uses unified virtual
* addressing then CUDA_ERROR_INVALID_VALUE is returned.
*
* If the current CUcontext does not
* support unified virtual addressing then CUDA_ERROR_INVALID_CONTEXT is
* returned.
*
*
* -
*
CU_POINTER_ATTRIBUTE_DEVICE_POINTER:
*
*
*
*
* Returns in *data the device
* pointer value through which ptr may be accessed by kernels
* running in the current CUcontext. The type of data must be
* CUdeviceptr *.
*
* If there exists no device pointer value
* through which kernels running in the current CUcontext may access ptr then CUDA_ERROR_INVALID_VALUE is returned.
*
* If there is no current CUcontext then
* CUDA_ERROR_INVALID_CONTEXT is returned.
*
* Except in the exceptional disjoint
* addressing cases discussed below, the value returned in *data
* will equal the input value ptr.
*
*
* -
*
CU_POINTER_ATTRIBUTE_HOST_POINTER:
*
*
*
*
* Returns in *data the host
* pointer value through which ptr may be accessed by by the
* host program. The type of data must be void **. If there
* exists no host pointer value through which the host program may directly
* access ptr then CUDA_ERROR_INVALID_VALUE is returned.
*
* Except in the exceptional disjoint
* addressing cases discussed below, the value returned in *data
* will equal the input value ptr.
*
*
* -
*
CU_POINTER_ATTRIBUTE_P2P_TOKENS:
*
*
*
*
* Returns in *data two tokens
* for use with the nv-p2p.h Linux kernel interface. data must
* be a struct of type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS.
*
* ptr must be a pointer to
* memory obtained from :cuMemAlloc(). Note that p2pToken and vaSpaceToken
* are only valid for the lifetime of the source allocation. A subsequent
* allocation at
* the same address may return completely
* different tokens.
*
*
* Note that for most allocations in the
* unified virtual address space the host and device pointer for accessing
* the allocation
* will be the same. The exceptions to this
* are
*
* -
*
user memory registered using
* cuMemHostRegister
*
*
* -
*
host memory allocated using
* cuMemHostAlloc with the CU_MEMHOSTALLOC_WRITECOMBINED flag For these
* types of allocation there will exist separate, disjoint host and device
* addresses for accessing the allocation.
* In particular
*
*
* -
*
The host address will correspond
* to an invalid unmapped device address (which will result in an exception
* if accessed from
* the device)
*
*
* -
*
The device address will
* correspond to an invalid unmapped host address (which will result in
* an exception if accessed from
* the host). For these types of
* allocations, querying CU_POINTER_ATTRIBUTE_HOST_POINTER and
* CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host
* and device addresses from either address.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param data Returned pointer attribute value
* @param attribute Pointer attribute to query
* @param ptr Pointer
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*
* @see JCudaDriver#cuMemAlloc
* @see JCudaDriver#cuMemFree
* @see JCudaDriver#cuMemAllocHost
* @see JCudaDriver#cuMemFreeHost
* @see JCudaDriver#cuMemHostAlloc
* @see JCudaDriver#cuMemHostRegister
* @see JCudaDriver#cuMemHostUnregister
*/
public static int cuPointerGetAttribute(Pointer data, int attribute, CUdeviceptr ptr)
{
return checkResult(cuPointerGetAttributeNative(data, attribute, ptr));
}
private static native int cuPointerGetAttributeNative(Pointer data, int attribute, CUdeviceptr ptr);
public static int cuPointerSetAttribute(Pointer value, int attribute, CUdeviceptr ptr)
{
return checkResult(cuPointerSetAttribute(value, attribute, ptr));
}
private static native int cuPointerSetAttributeNative(Pointer value, int attribute, CUdeviceptr ptr);
public static int cuPointerGetAttributes(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr)
{
return checkResult(cuPointerGetAttributesNative(numAttributes, attributes, data, ptr));
}
private static native int cuPointerGetAttributesNative(int numAttributes, int attributes[], Pointer data, CUdeviceptr ptr);
/**
* Create a stream.
*
*
* CUresult cuStreamCreate (
* CUstream* phStream,
* unsigned int Flags )
*
*
* Create a stream. Creates a stream and
* returns a handle in phStream. The Flags argument
* determines behaviors of the stream. Valid values for Flags
* are:
*
* -
*
CU_STREAM_DEFAULT: Default
* stream creation flag.
*
*
* -
*
CU_STREAM_NON_BLOCKING:
* Specifies that work running in the created stream may run concurrently
* with work in stream 0 (the NULL stream), and that
* the created stream should
* perform no implicit synchronization with stream 0.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param phStream Returned newly created stream
* @param Flags Parameters for stream creation
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuStreamDestroy
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuStreamQuery
* @see JCudaDriver#cuStreamSynchronize
* @see JCudaDriver#cuStreamAddCallback
*/
public static int cuStreamCreate(CUstream phStream, int Flags)
{
return checkResult(cuStreamCreateNative(phStream, Flags));
}
private static native int cuStreamCreateNative(CUstream phStream, int Flags);
public static int cuStreamCreateWithPriority(CUstream phStream, int flags, int priority)
{
return checkResult(cuStreamCreateWithPriorityNative(phStream, flags, priority));
}
private static native int cuStreamCreateWithPriorityNative(CUstream phStream, int flags, int priority);
public static int cuStreamGetPriority(CUstream hStream, int priority[])
{
return checkResult(cuStreamGetPriorityNative(hStream, priority));
}
private static native int cuStreamGetPriorityNative(CUstream hStream, int priority[]);
public static int cuStreamGetFlags(CUstream hStream, int flags[])
{
return checkResult(cuStreamGetFlagsNative(hStream, flags));
}
private static native int cuStreamGetFlagsNative(CUstream hStream, int flags[]);
/**
* Make a compute stream wait on an event.
*
*
* CUresult cuStreamWaitEvent (
* CUstream hStream,
* CUevent hEvent,
* unsigned int Flags )
*
*
* Make a compute stream wait on an event.
* Makes all future work submitted to hStream wait until hEvent reports completion before beginning execution. This
* synchronization will be performed efficiently on the device. The event
* hEvent may be from a different
* context than hStream, in which case this function will
* perform cross-device synchronization.
*
* The stream hStream will wait
* only for the completion of the most recent host call to cuEventRecord()
* on hEvent. Once this call has returned, any functions
* (including cuEventRecord() and cuEventDestroy()) may be called on hEvent again, and subsequent calls will not have any effect on
* hStream.
*
* If hStream is 0 (the NULL
* stream) any future work submitted in any stream will wait for hEvent to complete before beginning execution. This effectively
* creates a barrier for all future work submitted to the context.
*
* If cuEventRecord() has not been called
* on hEvent, this call acts as if the record has already
* completed, and so is a functional no-op.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hStream Stream to wait
* @param hEvent Event to wait on (may not be NULL)
* @param Flags Parameters for the operation (must be 0)
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
*
* @see JCudaDriver#cuStreamCreate
* @see JCudaDriver#cuEventRecord
* @see JCudaDriver#cuStreamQuery
* @see JCudaDriver#cuStreamSynchronize
* @see JCudaDriver#cuStreamAddCallback
* @see JCudaDriver#cuStreamDestroy
*/
public static int cuStreamWaitEvent(CUstream hStream, CUevent hEvent, int Flags)
{
return checkResult(cuStreamWaitEventNative(hStream, hEvent, Flags));
}
private static native int cuStreamWaitEventNative(CUstream hStream, CUevent hEvent, int Flags);
/**
* Add a callback to a compute stream.
*
*
* CUresult cuStreamAddCallback (
* CUstream hStream,
* CUstreamCallback callback,
* void* userData,
* unsigned int flags )
*
*
* Add a callback to a compute stream. Adds
* a callback to be called on the host after all currently enqueued items
* in the stream
* have completed. For each cuStreamAddCallback
* call, the callback will be executed exactly once. The callback will
* block later
* work in the stream until it is finished.
*
* The callback may be passed CUDA_SUCCESS
* or an error code. In the event of a device error, all subsequently
* executed callbacks will receive an appropriate CUresult.
*
* Callbacks must not make any CUDA API
* calls. Attempting to use a CUDA API will result in CUDA_ERROR_NOT_PERMITTED.
* Callbacks must not perform any synchronization that may depend on
* outstanding device work or other callbacks that are not
* mandated to run earlier. Callbacks
* without a mandated order (in independent streams) execute in undefined
* order and may be
* serialized.
*
* This API requires compute capability
* 1.1 or greater. See cuDeviceGetAttribute or cuDeviceGetProperties to
* query compute capability. Attempting to use this API with earlier
* compute versions will return CUDA_ERROR_NOT_SUPPORTED.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hStream Stream to add callback to
* @param callback The function to call once preceding stream operations are complete
* @param userData User specified data to be passed to the callback function
* @param flags Reserved for future use, must be 0
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_SUPPORTED
*
* @see JCudaDriver#cuStreamCreate
* @see JCudaDriver#cuStreamQuery
* @see JCudaDriver#cuStreamSynchronize
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuStreamDestroy
*/
public static int cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, Object userData, int flags)
{
return checkResult(cuStreamAddCallbackNative(hStream, callback, userData, flags));
}
private static native int cuStreamAddCallbackNative(CUstream hStream, CUstreamCallback callback, Object userData, int flags);
public static int cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, long length, int flags)
{
return checkResult(cuStreamAttachMemAsyncNative(hStream, dptr, length, flags));
}
private static native int cuStreamAttachMemAsyncNative(CUstream hStream, CUdeviceptr dptr, long length, int flags);
/**
* Determine status of a compute stream.
*
*
* CUresult cuStreamQuery (
* CUstream hStream )
*
*
* Determine status of a compute stream.
* Returns CUDA_SUCCESS if all operations in the stream specified by hStream have completed, or CUDA_ERROR_NOT_READY if not.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hStream Stream to query status of
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_READY
*
* @see JCudaDriver#cuStreamCreate
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuStreamDestroy
* @see JCudaDriver#cuStreamSynchronize
* @see JCudaDriver#cuStreamAddCallback
*/
public static int cuStreamQuery(CUstream hStream)
{
return checkResult(cuStreamQueryNative(hStream));
}
private static native int cuStreamQueryNative(CUstream hStream);
/**
* Wait until a stream's tasks are completed.
*
*
* CUresult cuStreamSynchronize (
* CUstream hStream )
*
*
* Wait until a stream's tasks are completed.
* Waits until the device has completed all operations in the stream
* specified by
* hStream. If the context was
* created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will
* block until the stream is finished with all of its tasks.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hStream Stream to wait for
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE
*
* @see JCudaDriver#cuStreamCreate
* @see JCudaDriver#cuStreamDestroy
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuStreamQuery
* @see JCudaDriver#cuStreamAddCallback
*/
public static int cuStreamSynchronize(CUstream hStream)
{
return checkResult(cuStreamSynchronizeNative(hStream));
}
private static native int cuStreamSynchronizeNative(CUstream hStream);
/**
* Destroys a stream.
*
*
* CUresult cuStreamDestroy (
* CUstream hStream )
*
*
* Destroys a stream. Destroys the stream
* specified by hStream.
*
* In case the device is still doing work
* in the stream hStream when cuStreamDestroy() is called, the
* function will return immediately and the resources associated with hStream will be released automatically once the device has
* completed all work in hStream.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param hStream Stream to destroy
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuStreamCreate
* @see JCudaDriver#cuStreamWaitEvent
* @see JCudaDriver#cuStreamQuery
* @see JCudaDriver#cuStreamSynchronize
* @see JCudaDriver#cuStreamAddCallback
*/
public static int cuStreamDestroy(CUstream hStream)
{
return checkResult(cuStreamDestroyNative(hStream));
}
private static native int cuStreamDestroyNative(CUstream hStream);
/**
* Initializes OpenGL interoperability.
*
*
* CUresult cuGLInit (
* void )
*
*
* Initializes OpenGL interoperability.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Initializes OpenGL interoperability.
* This function is deprecated and calling it is no longer required. It
* may fail if the
* needed OpenGL driver facilities are
* not available.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuGLMapBufferObject
* @see JCudaDriver#cuGLRegisterBufferObject
* @see JCudaDriver#cuGLUnmapBufferObject
* @see JCudaDriver#cuGLUnregisterBufferObject
* @see JCudaDriver#cuGLMapBufferObjectAsync
* @see JCudaDriver#cuGLUnmapBufferObjectAsync
* @see JCudaDriver#cuGLSetBufferObjectMapFlags
*/
public static int cuGLInit()
{
return checkResult(cuGLInitNative());
}
private static native int cuGLInitNative();
/**
* Create a CUDA context for interoperability with OpenGL.
*
*
* CUresult cuGLCtxCreate (
* CUcontext* pCtx,
* unsigned int Flags,
* CUdevice device )
*
*
* Create a CUDA context for
* interoperability with OpenGL.
* DeprecatedThis function is
* deprecated as of Cuda 5.0.This function is deprecated and should
* no longer be used. It is no longer necessary to associate a CUDA
* context with an OpenGL
* context in order to achieve maximum
* interoperability performance.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCtx Returned CUDA context
* @param Flags Options for CUDA context creation
* @param device Device on which to create the context
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuGLInit
* @see JCudaDriver#cuGLMapBufferObject
* @see JCudaDriver#cuGLRegisterBufferObject
* @see JCudaDriver#cuGLUnmapBufferObject
* @see JCudaDriver#cuGLUnregisterBufferObject
* @see JCudaDriver#cuGLMapBufferObjectAsync
* @see JCudaDriver#cuGLUnmapBufferObjectAsync
* @see JCudaDriver#cuGLSetBufferObjectMapFlags
*/
public static int cuGLCtxCreate( CUcontext pCtx, int Flags, CUdevice device )
{
return checkResult(cuGLCtxCreateNative(pCtx, Flags, device));
}
private static native int cuGLCtxCreateNative(CUcontext pCtx, int Flags, CUdevice device);
/**
* Gets the CUDA devices associated with the current OpenGL context.
*
*
* CUresult cuGLGetDevices (
* unsigned int* pCudaDeviceCount,
* CUdevice* pCudaDevices,
* unsigned int cudaDeviceCount,
* CUGLDeviceList deviceList )
*
*
* Gets the CUDA devices associated with
* the current OpenGL context. Returns in *pCudaDeviceCount
* the number of CUDA-compatible devices corresponding to the current
* OpenGL context. Also returns in *pCudaDevices at most
* cudaDeviceCount of the CUDA-compatible devices corresponding to the
* current OpenGL context. If any of the GPUs being
* used by the current OpenGL context are
* not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE.
*
* The deviceList argument may
* be any of the following:
*
* -
*
CU_GL_DEVICE_LIST_ALL: Query
* all devices used by the current OpenGL context.
*
*
* -
*
CU_GL_DEVICE_LIST_CURRENT_FRAME:
* Query the devices used by the current OpenGL context to render the
* current frame (in SLI).
*
*
* -
*
CU_GL_DEVICE_LIST_NEXT_FRAME:
* Query the devices used by the current OpenGL context to render the next
* frame (in SLI). Note that this is a prediction,
* it can't be guaranteed that this
* is correct in all cases.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCudaDeviceCount Returned number of CUDA devices.
* @param pCudaDevices Returned CUDA devices.
* @param cudaDeviceCount The size of the output device array pCudaDevices.
* @param deviceList The set of devices to return.
*
* @return CUDA_SUCCESS, CUDA_ERROR_NO_DEVICE,
* CUDA_ERROR_INVALID_VALUECUDA_ERROR_INVALID_CONTEXT
*
*/
public static int cuGLGetDevices(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList)
{
return checkResult(cuGLGetDevicesNative(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, CUGLDeviceList_deviceList));
}
private static native int cuGLGetDevicesNative(int pCudaDeviceCount[], CUdevice pCudaDevices[], int cudaDeviceCount, int CUGLDeviceList_deviceList);
/**
* Registers an OpenGL buffer object.
*
*
* CUresult cuGraphicsGLRegisterBuffer (
* CUgraphicsResource* pCudaResource,
* GLuint buffer,
* unsigned int Flags )
*
*
* Registers an OpenGL buffer object.
* Registers the buffer object specified by buffer for access
* by CUDA. A handle to the registered object is returned as pCudaResource. The register flags Flags specify the
* intended usage, as follows:
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_NONE:
* Specifies no hints about how this resource will be used. It is therefore
* assumed that this
* resource will be read from and
* written to by CUDA. This is the default value.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
* Specifies that CUDA will not write to this resource.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
* not read from this resource and will write over the entire
* contents of the resource, so
* none of the data previously stored in the resource will be preserved.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCudaResource Pointer to the returned object handle
* @param buffer name of buffer object to be registered
* @param Flags Register flags
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
* CUDA_ERROR_INVALID_CONTEXT,
*
* @see JCudaDriver#cuGraphicsUnregisterResource
* @see JCudaDriver#cuGraphicsMapResources
* @see JCudaDriver#cuGraphicsResourceGetMappedPointer
*/
public static int cuGraphicsGLRegisterBuffer(CUgraphicsResource pCudaResource, int buffer, int Flags)
{
return checkResult(cuGraphicsGLRegisterBufferNative(pCudaResource, buffer, Flags));
}
private static native int cuGraphicsGLRegisterBufferNative(CUgraphicsResource pCudaResource, int buffer, int Flags);
/**
* Register an OpenGL texture or renderbuffer object.
*
*
* CUresult cuGraphicsGLRegisterImage (
* CUgraphicsResource* pCudaResource,
* GLuint image,
* GLenum target,
* unsigned int Flags )
*
*
* Register an OpenGL texture or renderbuffer
* object. Registers the texture or renderbuffer object specified by image for access by CUDA. A handle to the registered object is
* returned as pCudaResource.
*
* target must match the type of
* the object, and must be one of GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE,
* GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D,
* GL_TEXTURE_2D_ARRAY, or GL_RENDERBUFFER.
*
* The register flags Flags
* specify the intended usage, as follows:
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_NONE:
* Specifies no hints about how this resource will be used. It is therefore
* assumed that this
* resource will be read from and
* written to by CUDA. This is the default value.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY:
* Specifies that CUDA will not write to this resource.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that CUDA will
* not read from this resource and will write over the entire
* contents of the resource, so
* none of the data previously stored in the resource will be preserved.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST:
* Specifies that CUDA will bind this resource to a surface
* reference.
*
*
* -
*
CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will
* perform texture gather operations on this resource.
*
*
*
*
* The following image formats are
* supported. For brevity's sake, the list is abbreviated. For ex., {GL_R,
* GL_RG} X {8, 16} would
* expand to the following 4 formats {GL_R8,
* GL_R16, GL_RG8, GL_RG16} :
*
* -
*
GL_RED, GL_RG, GL_RGBA,
* GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY
*
*
* -
*
{GL_R, GL_RG, GL_RGBA} X {8,
* 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I}
*
*
* -
*
{GL_LUMINANCE, GL_ALPHA,
* GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT,
* 16UI_EXT, 32UI_EXT, 8I_EXT,
* 16I_EXT, 32I_EXT}
*
*
*
*
* The following image classes are currently
* disallowed:
*
* -
*
Textures with borders
*
* -
*
Multisampled renderbuffers
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pCudaResource Pointer to the returned object handle
* @param image name of texture or renderbuffer object to be registered
* @param target Identifies the type of object specified by image
* @param Flags Register flags
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED,
* CUDA_ERROR_INVALID_CONTEXT,
*
* @see JCudaDriver#cuGraphicsUnregisterResource
* @see JCudaDriver#cuGraphicsMapResources
* @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
*/
public static int cuGraphicsGLRegisterImage(CUgraphicsResource pCudaResource, int image, int target, int Flags )
{
return checkResult(cuGraphicsGLRegisterImageNative(pCudaResource, image, target, Flags));
}
private static native int cuGraphicsGLRegisterImageNative(CUgraphicsResource pCudaResource, int image, int target, int Flags);
/**
* Registers an OpenGL buffer object.
*
*
* CUresult cuGLRegisterBufferObject (
* GLuint buffer )
*
*
* Registers an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Registers the buffer object specified
* by buffer for access by CUDA. This function must be called
* before CUDA can map the buffer object. There must be a valid OpenGL
* context
* bound to the current thread when this
* function is called, and the buffer name is resolved by that context.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param buffer The name of the buffer object to register.
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_ALREADY_MAPPED
*
* @see JCudaDriver#cuGraphicsGLRegisterBuffer
*/
public static int cuGLRegisterBufferObject( int bufferobj )
{
return checkResult(cuGLRegisterBufferObjectNative(bufferobj));
}
private static native int cuGLRegisterBufferObjectNative(int bufferobj);
/**
* Maps an OpenGL buffer object.
*
*
* CUresult cuGLMapBufferObject (
* CUdeviceptr* dptr,
* size_t* size,
* GLuint buffer )
*
*
* Maps an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Maps the buffer object specified by
* buffer into the address space of the current CUDA context
* and returns in *dptr and *size the base pointer
* and size of the resulting mapping.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
* All streams in the current CUDA
* context are synchronized with the current GL context.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Returned mapped base pointer
* @param size Returned size of mapping
* @param buffer The name of the buffer object to map
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_MAP_FAILED
*
* @see JCudaDriver#cuGraphicsMapResources
*/
public static int cuGLMapBufferObject( CUdeviceptr dptr, long size[], int bufferobj )
{
return checkResult(cuGLMapBufferObjectNative(dptr, size, bufferobj));
}
private static native int cuGLMapBufferObjectNative(CUdeviceptr dptr, long size[], int bufferobj);
/**
* Unmaps an OpenGL buffer object.
*
*
* CUresult cuGLUnmapBufferObject (
* GLuint buffer )
*
*
* Unmaps an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Unmaps the buffer object specified by
* buffer for access by CUDA.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
* All streams in the current CUDA
* context are synchronized with the current GL context.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param buffer Buffer object to unmap
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuGraphicsUnmapResources
*/
public static int cuGLUnmapBufferObject( int bufferobj )
{
return checkResult(cuGLUnmapBufferObjectNative(bufferobj));
}
private static native int cuGLUnmapBufferObjectNative(int bufferobj);
/**
* Unregister an OpenGL buffer object.
*
*
* CUresult cuGLUnregisterBufferObject (
* GLuint buffer )
*
*
* Unregister an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Unregisters the buffer object specified
* by buffer. This releases any resources associated with the
* registered buffer. After this call, the buffer may no longer be mapped
* for
* access by CUDA.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param buffer Name of the buffer object to unregister
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuGraphicsUnregisterResource
*/
public static int cuGLUnregisterBufferObject( int bufferobj )
{
return checkResult(cuGLUnregisterBufferObjectNative(bufferobj));
}
private static native int cuGLUnregisterBufferObjectNative(int bufferobj);
/**
* Set the map flags for an OpenGL buffer object.
*
*
* CUresult cuGLSetBufferObjectMapFlags (
* GLuint buffer,
* unsigned int Flags )
*
*
* Set the map flags for an OpenGL buffer
* object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Sets the map flags for the buffer
* object specified by buffer.
*
* Changes to Flags will take
* effect the next time buffer is mapped. The Flags
* argument may be any of the following:
*
* -
*
CU_GL_MAP_RESOURCE_FLAGS_NONE:
* Specifies no hints about how this resource will be used. It is therefore
* assumed that this
* resource will be read from
* and written to by CUDA kernels. This is the default value.
*
*
* -
*
CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY:
* Specifies that CUDA kernels which access this resource will not write
* to this resource.
*
*
* -
*
CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
* which access this resource will not read from this resource
* and will write over the
* entire contents of the resource, so none of the data previously stored
* in the resource will be preserved.
*
*
*
*
* If buffer has not been
* registered for use with CUDA, then CUDA_ERROR_INVALID_HANDLE is
* returned. If buffer is presently mapped for access by CUDA,
* then CUDA_ERROR_ALREADY_MAPPED is returned.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param buffer Buffer object to unmap
* @param Flags Map flags
*
* @return CUDA_SUCCESS, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_INVALID_CONTEXT,
*
* @see JCudaDriver#cuGraphicsResourceSetMapFlags
*/
public static int cuGLSetBufferObjectMapFlags( int buffer, int Flags )
{
return checkResult((cuGLSetBufferObjectMapFlagsNative(buffer, Flags)));
}
private static native int cuGLSetBufferObjectMapFlagsNative( int buffer, int Flags );
/**
* Maps an OpenGL buffer object.
*
*
* CUresult cuGLMapBufferObjectAsync (
* CUdeviceptr* dptr,
* size_t* size,
* GLuint buffer,
* CUstream hStream )
*
*
* Maps an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Maps the buffer object specified by
* buffer into the address space of the current CUDA context
* and returns in *dptr and *size the base pointer
* and size of the resulting mapping.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
* Stream hStream in the
* current CUDA context is synchronized with the current GL context.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param dptr Returned mapped base pointer
* @param size Returned size of mapping
* @param buffer The name of the buffer object to map
* @param hStream Stream to synchronize
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_MAP_FAILED
*
* @see JCudaDriver#cuGraphicsMapResources
*/
public static int cuGLMapBufferObjectAsync( CUdeviceptr dptr, long size[], int buffer, CUstream hStream)
{
return checkResult((cuGLMapBufferObjectAsyncNative(dptr, size, buffer, hStream)));
}
private static native int cuGLMapBufferObjectAsyncNative( CUdeviceptr dptr, long size[], int buffer, CUstream hStream);
/**
* Unmaps an OpenGL buffer object.
*
*
* CUresult cuGLUnmapBufferObjectAsync (
* GLuint buffer,
* CUstream hStream )
*
*
* Unmaps an OpenGL buffer object.
* DeprecatedThis function is
* deprecated as of Cuda 3.0.Unmaps the buffer object specified by
* buffer for access by CUDA.
*
* There must be a valid OpenGL context
* bound to the current thread when this function is called. This must be
* the same context,
* or a member of the same shareGroup,
* as the context that was bound when the buffer was registered.
*
* Stream hStream in the
* current CUDA context is synchronized with the current GL context.
*
*
* Note:
* Note that
* this function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param buffer Name of the buffer object to unmap
* @param hStream Stream to synchronize
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuGraphicsUnmapResources
*/
public static int cuGLUnmapBufferObjectAsync( int buffer, CUstream hStream )
{
return checkResult((cuGLUnmapBufferObjectAsyncNative(buffer, hStream)));
}
private static native int cuGLUnmapBufferObjectAsyncNative( int buffer, CUstream hStream );
/**
* Unregisters a graphics resource for access by CUDA.
*
*
* CUresult cuGraphicsUnregisterResource (
* CUgraphicsResource resource )
*
*
* Unregisters a graphics resource for
* access by CUDA. Unregisters the graphics resource resource
* so it is not accessible by CUDA unless registered again.
*
* If resource is invalid then
* CUDA_ERROR_INVALID_HANDLE is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param resource Resource to unregister
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuGraphicsGLRegisterBuffer
* @see JCudaDriver#cuGraphicsGLRegisterImage
*/
public static int cuGraphicsUnregisterResource(CUgraphicsResource resource)
{
return checkResult(cuGraphicsUnregisterResourceNative(resource));
}
private static native int cuGraphicsUnregisterResourceNative(CUgraphicsResource resource);
/**
* Get an array through which to access a subresource of a mapped graphics resource.
*
*
* CUresult cuGraphicsSubResourceGetMappedArray (
* CUarray* pArray,
* CUgraphicsResource resource,
* unsigned int arrayIndex,
* unsigned int mipLevel )
*
*
* Get an array through which to access a
* subresource of a mapped graphics resource. Returns in *pArray
* an array through which the subresource of the mapped graphics resource
* resource which corresponds to array index arrayIndex
* and mipmap level mipLevel may be accessed. The value set in
* *pArray may change every time that resource is
* mapped.
*
* If resource is not a texture
* then it cannot be accessed via an array and CUDA_ERROR_NOT_MAPPED_AS_ARRAY
* is returned. If arrayIndex is not a valid array index for
* resource then CUDA_ERROR_INVALID_VALUE is returned. If mipLevel is not a valid mipmap level for resource then
* CUDA_ERROR_INVALID_VALUE is returned. If resource is not
* mapped then CUDA_ERROR_NOT_MAPPED is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pArray Returned array through which a subresource of resource may be accessed
* @param resource Mapped resource to access
* @param arrayIndex Array index for array textures or cubemap face index as defined by CUarray_cubemap_face for cubemap textures for the subresource to access
* @param mipLevel Mipmap level for the subresource to access
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
*
* @see JCudaDriver#cuGraphicsResourceGetMappedPointer
*/
public static int cuGraphicsSubResourceGetMappedArray(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel)
{
return checkResult(cuGraphicsSubResourceGetMappedArrayNative(pArray, resource, arrayIndex, mipLevel));
}
private static native int cuGraphicsSubResourceGetMappedArrayNative(CUarray pArray, CUgraphicsResource resource, int arrayIndex, int mipLevel);
/**
* Get a mipmapped array through which to access a mapped graphics resource.
*
*
* CUresult cuGraphicsResourceGetMappedMipmappedArray (
* CUmipmappedArray* pMipmappedArray,
* CUgraphicsResource resource )
*
*
* Get a mipmapped array through which to
* access a mapped graphics resource. Returns in *pMipmappedArray
* a mipmapped array through which the mapped graphics resource resource. The value set in *pMipmappedArray may change
* every time that resource is mapped.
*
* If resource is not a texture
* then it cannot be accessed via a mipmapped array and
* CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. If resource is
* not mapped then CUDA_ERROR_NOT_MAPPED is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pMipmappedArray Returned mipmapped array through which resource may be accessed
* @param resource Mapped resource to access
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_ARRAY
*
* @see JCudaDriver#cuGraphicsResourceGetMappedPointer
*/
public static int cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource)
{
return checkResult(cuGraphicsResourceGetMappedMipmappedArrayNative(pMipmappedArray, resource));
}
private static native int cuGraphicsResourceGetMappedMipmappedArrayNative(CUmipmappedArray pMipmappedArray, CUgraphicsResource resource);
/**
* Get a device pointer through which to access a mapped graphics resource.
*
*
* CUresult cuGraphicsResourceGetMappedPointer (
* CUdeviceptr* pDevPtr,
* size_t* pSize,
* CUgraphicsResource resource )
*
*
* Get a device pointer through which to
* access a mapped graphics resource. Returns in *pDevPtr a
* pointer through which the mapped graphics resource resource
* may be accessed. Returns in pSize the size of the memory in
* bytes which may be accessed from that pointer. The value set in pPointer may change every time that resource is
* mapped.
*
* If resource is not a buffer
* then it cannot be accessed via a pointer and CUDA_ERROR_NOT_MAPPED_AS_POINTER
* is returned. If resource is not mapped then CUDA_ERROR_NOT_MAPPED
* is returned. *
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pDevPtr Returned pointer through which resource may be accessed
* @param pSize Returned size of the buffer accessible starting at *pPointer
* @param resource Mapped resource to access
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_MAPPEDCUDA_ERROR_NOT_MAPPED_AS_POINTER
*
* @see JCudaDriver#cuGraphicsMapResources
* @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
*/
public static int cuGraphicsResourceGetMappedPointer( CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource )
{
return checkResult(cuGraphicsResourceGetMappedPointerNative(pDevPtr, pSize, resource));
}
private static native int cuGraphicsResourceGetMappedPointerNative(CUdeviceptr pDevPtr, long pSize[], CUgraphicsResource resource);
/**
* Set usage flags for mapping a graphics resource.
*
*
* CUresult cuGraphicsResourceSetMapFlags (
* CUgraphicsResource resource,
* unsigned int flags )
*
*
* Set usage flags for mapping a graphics
* resource. Set flags for mapping the graphics resource resource.
*
* Changes to flags will take
* effect the next time resource is mapped. The flags
* argument may be any of the following:
*
*
* -
*
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE:
* Specifies no hints about how this resource will be used. It is therefore
* assumed that
* this resource will be read from
* and written to by CUDA kernels. This is the default value.
*
*
* -
*
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY:
* Specifies that CUDA kernels which access this resource will not write
* to this resource.
*
*
* -
*
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA
* kernels which access this resource will not read from this
* resource and will write over
* the entire contents of the resource, so none of the data previously
* stored in the resource will
* be preserved.
*
*
*
*
* If resource is presently
* mapped for access by CUDA then CUDA_ERROR_ALREADY_MAPPED is returned.
* If flags is not one of the above values then
* CUDA_ERROR_INVALID_VALUE is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param resource Registered resource to set flags for
* @param flags Parameters for resource mapping
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ALREADY_MAPPED
*
* @see JCudaDriver#cuGraphicsMapResources
*/
public static int cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, int flags )
{
return checkResult(cuGraphicsResourceSetMapFlagsNative(resource, flags));
}
private static native int cuGraphicsResourceSetMapFlagsNative( CUgraphicsResource resource, int flags );
/**
* Map graphics resources for access by CUDA.
*
*
* CUresult cuGraphicsMapResources (
* unsigned int count,
* CUgraphicsResource* resources,
* CUstream hStream )
*
*
* Map graphics resources for access by
* CUDA. Maps the count graphics resources in resources
* for access by CUDA.
*
* The resources in resources
* may be accessed by CUDA until they are unmapped. The graphics API from
* which resources were registered should not access any
* resources while they are mapped by CUDA. If an application does so,
* the results are
* undefined.
*
* This function provides the synchronization
* guarantee that any graphics calls issued before cuGraphicsMapResources()
* will complete before any subsequent CUDA work issued in stream
* begins.
*
* If resources includes any
* duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
* of resources are presently mapped for access by CUDA then
* CUDA_ERROR_ALREADY_MAPPED is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param count Number of resources to map
* @param resources Resources to map for CUDA usage
* @param hStream Stream with which to synchronize
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuGraphicsResourceGetMappedPointer
* @see JCudaDriver#cuGraphicsSubResourceGetMappedArray
* @see JCudaDriver#cuGraphicsUnmapResources
*/
public static int cuGraphicsMapResources(int count, CUgraphicsResource resources[], CUstream hStream)
{
return checkResult(cuGraphicsMapResourcesNative(count, resources, hStream));
}
private static native int cuGraphicsMapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);
/**
* Unmap graphics resources.
*
*
* CUresult cuGraphicsUnmapResources (
* unsigned int count,
* CUgraphicsResource* resources,
* CUstream hStream )
*
*
* Unmap graphics resources. Unmaps the
* count graphics resources in resources.
*
* Once unmapped, the resources in resources may not be accessed by CUDA until they are mapped
* again.
*
* This function provides the synchronization
* guarantee that any CUDA work issued in stream before
* cuGraphicsUnmapResources() will complete before any subsequently issued
* graphics work begins.
*
* If resources includes any
* duplicate entries then CUDA_ERROR_INVALID_HANDLE is returned. If any
* of resources are not presently mapped for access by CUDA then
* CUDA_ERROR_NOT_MAPPED is returned.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param count Number of resources to unmap
* @param resources Resources to unmap
* @param hStream Stream with which to synchronize
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuGraphicsMapResources
*/
public static int cuGraphicsUnmapResources( int count, CUgraphicsResource resources[], CUstream hStream)
{
return checkResult(cuGraphicsUnmapResourcesNative(count, resources, hStream));
}
private static native int cuGraphicsUnmapResourcesNative(int count, CUgraphicsResource resources[], CUstream hStream);
/**
* Set resource limits.
*
*
* CUresult cuCtxSetLimit (
* CUlimit limit,
* size_t value )
*
*
* Set resource limits. Setting limit to value is a request by the application to
* update the current limit maintained by the context. The driver is free
* to modify the requested
* value to meet h/w requirements (this
* could be clamping to minimum or maximum values, rounding up to nearest
* element size,
* etc). The application can use
* cuCtxGetLimit() to find out exactly what the limit has been set to.
*
* Setting each CUlimit has its own specific
* restrictions, so each is discussed here.
*
*
* -
*
CU_LIMIT_STACK_SIZE controls
* the stack size in bytes of each GPU thread. This limit is only
* applicable to devices of compute capability 2.0 and
* higher. Attempting to set this
* limit on devices of compute capability less than 2.0 will result in
* the error CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
*
*
*
*
* -
*
CU_LIMIT_PRINTF_FIFO_SIZE
* controls the size in bytes of the FIFO used by the printf() device
* system call. Setting CU_LIMIT_PRINTF_FIFO_SIZE must be performed before
* launching any kernel that uses the printf() device system call,
* otherwise CUDA_ERROR_INVALID_VALUE will be returned. This limit is only
* applicable to devices of compute capability 2.0 and higher. Attempting
* to set this limit
* on devices of compute capability
* less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
* being returned.
*
*
*
*
*
* -
*
CU_LIMIT_MALLOC_HEAP_SIZE
* controls the size in bytes of the heap used by the malloc() and free()
* device system calls. Setting CU_LIMIT_MALLOC_HEAP_SIZE must be performed
* before launching any kernel that uses the malloc() or free() device
* system calls, otherwise CUDA_ERROR_INVALID_VALUE will be returned. This
* limit is only applicable to devices of compute capability 2.0 and
* higher. Attempting to set this limit
* on devices of compute capability
* less than 2.0 will result in the error CUDA_ERROR_UNSUPPORTED_LIMIT
* being returned.
*
*
*
*
*
* -
*
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH
* controls the maximum nesting depth of a grid at which a thread can
* safely call cudaDeviceSynchronize(). Setting this limit
* must be performed before any
* launch of a kernel that uses the device runtime and calls
* cudaDeviceSynchronize() above the default
* sync depth, two levels of grids.
* Calls to cudaDeviceSynchronize() will fail with error code
* cudaErrorSyncDepthExceeded if
* the limitation is violated. This
* limit can be set smaller than the default or up the maximum launch
* depth of 24. When setting
* this limit, keep in mind that
* additional levels of sync depth require the driver to reserve large
* amounts of device memory
* which can no longer be used for
* user allocations. If these reservations of device memory fail,
* cuCtxSetLimit will return CUDA_ERROR_OUT_OF_MEMORY, and the limit can
* be reset to a lower value. This limit is only applicable to devices of
* compute capability 3.5 and higher.
* Attempting to set this limit on
* devices of compute capability less than 3.5 will result in the error
* CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
*
*
*
*
* -
*
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number
* of outstanding device runtime launches that can be made from the
* current context. A grid is outstanding
* from the point of launch up
* until the grid is known to have been completed. Device runtime launches
* which violate this limitation
* fail and return
* cudaErrorLaunchPendingCountExceeded when cudaGetLastError() is called
* after launch. If more pending launches
* than the default (2048 launches)
* are needed for a module using the device runtime, this limit can be
* increased. Keep in mind
* that being able to sustain
* additional pending launches will require the driver to reserve larger
* amounts of device memory
* upfront which can no longer be
* used for allocations. If these reservations fail, cuCtxSetLimit will
* return CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower
* value. This limit is only applicable to devices of compute capability
* 3.5 and higher.
* Attempting to set this limit on
* devices of compute capability less than 3.5 will result in the error
* CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param limit Limit to set
* @param value Size of limit
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_UNSUPPORTED_LIMIT,
* CUDA_ERROR_OUT_OF_MEMORY
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxSetLimit(int limit, long value)
{
return checkResult(cuCtxSetLimitNative(limit, value));
}
private static native int cuCtxSetLimitNative(int limit, long value);
/**
* Returns the preferred cache configuration for the current context.
*
*
* CUresult cuCtxGetCacheConfig (
* CUfunc_cache* pconfig )
*
*
* Returns the preferred cache configuration
* for the current context. On devices where the L1 cache and shared
* memory use the
* same hardware resources, this function
* returns through pconfig the preferred cache configuration
* for the current context. This is only a preference. The driver will
* use the requested configuration
* if possible, but it is free to choose a
* different configuration if required to execute functions.
*
* This will return a pconfig of
* CU_FUNC_CACHE_PREFER_NONE on devices where the size of the L1 cache
* and shared memory are fixed.
*
* The supported cache configurations are:
*
* -
*
CU_FUNC_CACHE_PREFER_NONE: no
* preference for shared memory or L1 (default)
*
*
* -
*
CU_FUNC_CACHE_PREFER_SHARED:
* prefer larger shared memory and smaller L1 cache
*
*
* -
*
CU_FUNC_CACHE_PREFER_L1: prefer
* larger L1 cache and smaller shared memory
*
*
* -
*
CU_FUNC_CACHE_PREFER_EQUAL:
* prefer equal sized L1 cache and shared memory
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pconfig Returned cache configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
* @see JCudaDriver#cuFuncSetCacheConfig
*/
public static int cuCtxGetCacheConfig(int pconfig[])
{
return checkResult(cuCtxGetCacheConfigNative(pconfig));
}
private static native int cuCtxGetCacheConfigNative(int[] pconfig);
/**
* Sets the preferred cache configuration for the current context.
*
*
* CUresult cuCtxSetCacheConfig (
* CUfunc_cache config )
*
*
* Sets the preferred cache configuration
* for the current context. On devices where the L1 cache and shared
* memory use the same
* hardware resources, this sets through
* config the preferred cache configuration for the current
* context. This is only a preference. The driver will use the requested
* configuration
* if possible, but it is free to choose a
* different configuration if required to execute the function. Any
* function preference
* set via cuFuncSetCacheConfig() will be
* preferred over this context-wide setting. Setting the context-wide
* cache configuration to CU_FUNC_CACHE_PREFER_NONE will cause subsequent
* kernel launches to prefer to not change the cache configuration unless
* required to launch the kernel.
*
* This setting does nothing on devices
* where the size of the L1 cache and shared memory are fixed.
*
* Launching a kernel with a different
* preference than the most recent preference setting may insert a
* device-side synchronization
* point.
*
* The supported cache configurations are:
*
* -
*
CU_FUNC_CACHE_PREFER_NONE: no
* preference for shared memory or L1 (default)
*
*
* -
*
CU_FUNC_CACHE_PREFER_SHARED:
* prefer larger shared memory and smaller L1 cache
*
*
* -
*
CU_FUNC_CACHE_PREFER_L1: prefer
* larger L1 cache and smaller shared memory
*
*
* -
*
CU_FUNC_CACHE_PREFER_EQUAL:
* prefer equal sized L1 cache and shared memory
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param config Requested cache configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
* @see JCudaDriver#cuFuncSetCacheConfig
*/
public static int cuCtxSetCacheConfig(int config)
{
return checkResult(cuCtxSetCacheConfigNative(config));
}
private static native int cuCtxSetCacheConfigNative(int config);
/**
* Returns the current shared memory configuration for the current context.
*
*
* CUresult cuCtxGetSharedMemConfig (
* CUsharedconfig* pConfig )
*
*
* Returns the current shared memory
* configuration for the current context. This function will return in
* pConfig the current size of shared memory banks in the
* current context. On devices with configurable shared memory banks,
* cuCtxSetSharedMemConfig can be used to change this setting, so that
* all subsequent kernel launches will by default use the new bank size.
* When cuCtxGetSharedMemConfig is called on devices without configurable
* shared memory, it will return the fixed bank size of the hardware.
*
* The returned bank configurations can be
* either:
*
* -
*
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is
* four bytes.
*
*
* -
*
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width
* will eight bytes.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pConfig returned shared memory configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
* @see JCudaDriver#cuCtxGetSharedMemConfig
* @see JCudaDriver#cuFuncSetCacheConfig
*/
public static int cuCtxGetSharedMemConfig(int pConfig[])
{
return checkResult(cuCtxGetSharedMemConfig(pConfig));
}
private static native int cuCtxGetSharedMemConfigNative(int pConfig[]);
/**
* Sets the shared memory configuration for the current context.
*
*
* CUresult cuCtxSetSharedMemConfig (
* CUsharedconfig config )
*
*
* Sets the shared memory configuration for
* the current context. On devices with configurable shared memory banks,
* this function
* will set the context's shared memory bank
* size which is used for subsequent kernel launches.
*
* Changed the shared memory configuration
* between launches may insert a device side synchronization point between
* those launches.
*
* Changing the shared memory bank size
* will not increase shared memory usage or affect occupancy of kernels,
* but may have major
* effects on performance. Larger bank sizes
* will allow for greater potential bandwidth to shared memory, but will
* change what
* kinds of accesses to shared memory will
* result in bank conflicts.
*
* This function will do nothing on devices
* with fixed shared memory bank size.
*
* The supported bank configurations are:
*
* -
*
CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
* set bank width to the default initial setting (currently, four bytes).
*
*
* -
*
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width
* to be natively four bytes.
*
*
* -
*
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank
* width to be natively eight bytes.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param config requested shared memory configuration
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
* @see JCudaDriver#cuCtxGetSharedMemConfig
* @see JCudaDriver#cuFuncSetCacheConfig
*/
public static int cuCtxSetSharedMemConfig(int config)
{
return checkResult(cuCtxSetSharedMemConfigNative(config));
}
private static native int cuCtxSetSharedMemConfigNative(int config);
/**
* Gets the context's API version.
*
*
* CUresult cuCtxGetApiVersion (
* CUcontext ctx,
* unsigned int* version )
*
*
* Gets the context's API version. Returns
* a version number in version corresponding to the capabilities
* of the context (e.g. 3010 or 3020), which library developers can use
* to direct callers
* to a specific API version. If ctx is NULL, returns the API version used to create the currently
* bound context.
*
* Note that new API versions are only
* introduced when context capabilities are changed that break binary
* compatibility, so the
* API version and driver version may be
* different. For example, it is valid for the API version to be 3020
* while the driver
* version is 4020.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param ctx Context to check
* @param version Pointer to version
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_UNKNOWN
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxGetLimit
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxGetApiVersion(CUcontext ctx, int version[])
{
return checkResult(cuCtxGetApiVersionNative(ctx, version));
}
private static native int cuCtxGetApiVersionNative(CUcontext ctx, int version[]);
/**
* Returns numerical values that correspond to the least and
* greatest stream priorities.
*
* Returns in *leastPriority and *greatestPriority the numerical values that correspond
* to the least and greatest stream priorities respectively. Stream priorities
* follow a convention where lower numbers imply greater priorities. The range of
* meaningful stream priorities is given by [*greatestPriority, *leastPriority].
* If the user attempts to create a stream with a priority value that is
* outside the meaningful range as specified by this API, the priority is
* automatically clamped down or up to either *leastPriority or *greatestPriority
* respectively. See ::cuStreamCreateWithPriority for details on creating a
* priority stream.
* A NULL may be passed in for *leastPriority or *greatestPriority if the value
* is not desired.
*
* This function will return '0' in both \p *leastPriority and \p *greatestPriority if
* the current context's device does not support stream priorities
* (see ::cuDeviceGetAttribute).
*
* @param leastPriority Pointer to an int in which the numerical value for least
* stream priority is returned
* @param greatestPriority Pointer to an int in which the numerical value for greatest
* stream priority is returned
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE
*
* @see JCudaDriver#cuStreamCreateWithPriority
* @see JCudaDriver#cuStreamGetPriority,
* @see JCudaDriver#cuCtxGetDevice,
* @see JCudaDriver#cuCtxSetLimit,
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxGetStreamPriorityRange(int leastPriority[], int greatestPriority[])
{
return checkResult(cuCtxGetStreamPriorityRangeNative(leastPriority, greatestPriority));
}
private static native int cuCtxGetStreamPriorityRangeNative(int leastPriority[], int greatestPriority[]);
/**
* Launches a CUDA function.
*
*
*
*
*
* CUresult cuLaunchKernel
* (
* CUfunction
* f,
*
*
*
*
* unsigned int
* gridDimX,
*
*
*
*
* unsigned int
* gridDimY,
*
*
*
*
* unsigned int
* gridDimZ,
*
*
*
*
* unsigned int
* blockDimX,
*
*
*
*
* unsigned int
* blockDimY,
*
*
*
*
* unsigned int
* blockDimZ,
*
*
*
*
* unsigned int
* sharedMemBytes,
*
*
*
*
* CUstream
* hStream,
*
*
*
*
* void **
* kernelParams,
*
*
*
*
* void **
* extra
*
*
*
*
* )
*
*
*
*
*
*
*
*
* Invokes the kernel f
on a gridDimX
x
* gridDimY
x gridDimZ
grid of blocks. Each
* block contains blockDimX
x blockDimY
x
* blockDimZ
threads.
*
* sharedMemBytes
sets the amount of dynamic shared memory
* that will be available to each thread block.
*
* cuLaunchKernel() can optionally be associated to a stream by passing a
* non-zero hStream
argument.
*
* Kernel parameters to f
can be specified in one of two
* ways:
*
* 1) Kernel parameters can be specified via kernelParams
.
* If f
has N parameters, then kernelParams
* needs to be an array of N pointers. Each of kernelParams
[0]
* through kernelParams
[N-1] must point to a region of memory
* from which the actual kernel parameter will be copied. The number of
* kernel parameters and their offsets and sizes do not need to be
* specified as that information is retrieved directly from the kernel's
* image.
*
* 2) Kernel parameters can also be packaged by the application into a
* single buffer that is passed in via the extra
parameter.
* This places the burden on the application of knowing each kernel
* parameter's size and alignment/padding within the buffer. Here is an
* example of using the extra
parameter in this manner:
*
* size_t argBufferSize;
* char argBuffer[256];
*
* // populate argBuffer and argBufferSize
*
* void *config[] = {
* CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
* CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize,
* CU_LAUNCH_PARAM_END
* };
* status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL,
* config);
*
*
*
* The extra
parameter exists to allow cuLaunchKernel to take
* additional less commonly used arguments. extra
specifies
* a list of names of extra settings and their corresponding values. Each
* extra setting name is immediately followed by the corresponding value.
* The list must be terminated with either NULL or
* CU_LAUNCH_PARAM_END.
*
*
* - CU_LAUNCH_PARAM_END, which indicates the end of the
extra
* array;
*
* - CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that
* the next value in
extra
will be a pointer to a buffer
* containing all the kernel parameters for launching kernel
* f
;
*
* - CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies
* that the next value in
extra
will be a pointer to a size_t
* containing the size of the buffer specified with
* CU_LAUNCH_PARAM_BUFFER_POINTER;
*
*
*
* The error CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters
* are specified with both kernelParams
and extra
* (i.e. both kernelParams
and extra
are
* non-NULL).
*
* Calling cuLaunchKernel() sets persistent function state that is the
* same as function state set through the following deprecated APIs:
*
* cuFuncSetBlockShape() cuFuncSetSharedSize() cuParamSetSize()
* cuParamSeti() cuParamSetf() cuParamSetv()
*
* When the kernel f
is launched via cuLaunchKernel(), the
* previous block shape, shared size and parameter info associated with
* f
is overwritten.
*
* Note that to use cuLaunchKernel(), the kernel f
must
* either have been compiled with toolchain version 3.2 or later so that
* it will contain kernel parameter information, or have no kernel
* parameters. If either of these conditions is not met, then
* cuLaunchKernel() will return CUDA_ERROR_INVALID_IMAGE.
*
*
*
*
* @return CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE,
* CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
*
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuFuncSetCacheConfig
* @see JCudaDriver#cuFuncGetAttribute
*/
public static int cuLaunchKernel(
CUfunction f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream hStream,
Pointer kernelParams,
Pointer extra)
{
return checkResult(cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra));
}
private static native int cuLaunchKernelNative(
CUfunction f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream hStream,
Pointer kernelParams,
Pointer extra);
/**
* Returns resource limits.
*
*
* CUresult cuCtxGetLimit (
* size_t* pvalue,
* CUlimit limit )
*
*
* Returns resource limits. Returns in *pvalue the current size of limit. The supported
* CUlimit values are:
*
* -
*
CU_LIMIT_STACK_SIZE: stack size
* in bytes of each GPU thread.
*
*
* -
*
CU_LIMIT_PRINTF_FIFO_SIZE: size
* in bytes of the FIFO used by the printf() device system call.
*
*
* -
*
CU_LIMIT_MALLOC_HEAP_SIZE: size
* in bytes of the heap used by the malloc() and free() device system
* calls.
*
*
* -
*
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH:
* maximum grid depth at which a thread can issue the device runtime call
* cudaDeviceSynchronize() to wait on child grid launches
* to complete.
*
*
* -
*
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of
* outstanding device runtime launches that can be made from this
* context.
*
*
*
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param pvalue Returned size of limit
* @param limit Limit to query
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_UNSUPPORTED_LIMIT
*
* @see JCudaDriver#cuCtxCreate
* @see JCudaDriver#cuCtxDestroy
* @see JCudaDriver#cuCtxGetApiVersion
* @see JCudaDriver#cuCtxGetCacheConfig
* @see JCudaDriver#cuCtxGetDevice
* @see JCudaDriver#cuCtxPopCurrent
* @see JCudaDriver#cuCtxPushCurrent
* @see JCudaDriver#cuCtxSetCacheConfig
* @see JCudaDriver#cuCtxSetLimit
* @see JCudaDriver#cuCtxSynchronize
*/
public static int cuCtxGetLimit(long pvalue[], int limit)
{
return checkResult(cuCtxGetLimitNative(pvalue, limit));
}
private static native int cuCtxGetLimitNative(long pvalue[], int limit);
/**
* Initialize the profiling.
*
*
* CUresult cuProfilerInitialize (
* const char* configFile,
* const char* outputFile,
* CUoutput_mode outputMode )
*
*
* Initialize the profiling. Using this
* API user can initialize the CUDA profiler by specifying the configuration
* file, output
* file and output file format. This API is
* generally used to profile different set of counters by looping the
* kernel launch.
* The configFile parameter can
* be used to select profiling options including profiler counters. Refer
* to the "Compute Command Line Profiler
* User Guide" for supported profiler
* options and counters.
*
* Limitation: The CUDA profiler cannot be
* initialized with this API if another profiling tool is already active,
* as indicated
* by the CUDA_ERROR_PROFILER_DISABLED
* return code.
*
* Typical usage of the profiling APIs is
* as follows:
*
* for each set of counters/options
* {
* cuProfilerInitialize(); //Initialize
* profiling, set the counters or options in the config file
* ...
* cuProfilerStart();
* // code to be profiled
* cuProfilerStop();
* ...
* cuProfilerStart();
* // code to be profiled
* cuProfilerStop();
* ...
* }
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
* @param configFile Name of the config file that lists the counters/options for profiling.
* @param outputFile Name of the outputFile where the profiling results will be stored.
* @param outputMode outputMode, can be CU_OUT_KEY_VALUE_PAIR or CU_OUT_CSV.
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_PROFILER_DISABLED
*
* @see JCudaDriver#cuProfilerStart
* @see JCudaDriver#cuProfilerStop
*/
public static int cuProfilerInitialize(String configFile, String outputFile, int outputMode)
{
return checkResult(cuProfilerInitializeNative(configFile, outputFile, outputMode));
}
private static native int cuProfilerInitializeNative(String configFile, String outputFile, int outputMode);
/**
* Enable profiling.
*
*
* CUresult cuProfilerStart (
* void )
*
*
* Enable profiling. Enables profile
* collection by the active profiling tool. If profiling is already
* enabled, then cuProfilerStart() has no effect.
*
* cuProfilerStart and cuProfilerStop APIs
* are used to programmatically control the profiling granularity by
* allowing profiling
* to be done only on selective pieces of
* code.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuProfilerInitialize
* @see JCudaDriver#cuProfilerStop
*/
public static int cuProfilerStart()
{
return checkResult(cuProfilerStartNative());
}
private static native int cuProfilerStartNative();
/**
* Disable profiling.
*
*
* CUresult cuProfilerStop (
* void )
*
*
* Disable profiling. Disables profile
* collection by the active profiling tool. If profiling is already
* disabled, then cuProfilerStop() has no effect.
*
* cuProfilerStart and cuProfilerStop APIs
* are used to programmatically control the profiling granularity by
* allowing profiling
* to be done only on selective pieces of
* code.
*
*
* Note:
* Note that this
* function may also return error codes from previous, asynchronous
* launches.
*
*
*
*
*
*
* @return CUDA_SUCCESS, CUDA_ERROR_INVALID_CONTEXT
*
* @see JCudaDriver#cuProfilerInitialize
* @see JCudaDriver#cuProfilerStart
*/
public static int cuProfilerStop()
{
return checkResult(cuProfilerStopNative());
}
private static native int cuProfilerStopNative();
}