org.lwjgl.cuda.CU Maven / Gradle / Ivy
/*
* Copyright LWJGL. All rights reserved.
* License terms: https://www.lwjgl.org/license
* MACHINE GENERATED FILE, DO NOT EDIT
*/
package org.lwjgl.cuda;
import javax.annotation.*;
import java.nio.*;
import org.lwjgl.*;
import org.lwjgl.system.*;
import static org.lwjgl.system.APIUtil.*;
import static org.lwjgl.system.Checks.*;
import static org.lwjgl.system.JNI.*;
import static org.lwjgl.system.MemoryStack.*;
import static org.lwjgl.system.MemoryUtil.*;
import static org.lwjgl.system.Pointer.*;
import org.lwjgl.system.libffi.*;
import static org.lwjgl.cuda.CUDA.*;
import static org.lwjgl.system.libffi.LibFFI.*;
/**
* Contains bindings to the CUDA Driver API.
*
* Functionality up to CUDA version 3.2, which is the minimum version compatible with the LWJGL bindings, is guaranteed to be available. Functions
* introduced after CUDA 3.2 may or may not be missing, depending on the CUDA version available at runtime.
*/
public class CU {
private static final SharedLibrary NVCUDA = Library.loadNative(CU.class, "org.lwjgl.cuda", Configuration.CUDA_LIBRARY_NAME, "nvcuda");
/** Contains the function pointers loaded from the NVCUDA {@link SharedLibrary}. */
public static final class Functions {
private Functions() {}
/** Function address. */
public static final long
GetErrorString = apiGetFunctionAddress(NVCUDA, "cuGetErrorString"),
GetErrorName = apiGetFunctionAddress(NVCUDA, "cuGetErrorName"),
Init = apiGetFunctionAddress(NVCUDA, "cuInit"),
DriverGetVersion = apiGetFunctionAddress(NVCUDA, "cuDriverGetVersion"),
DeviceGet = apiGetFunctionAddress(NVCUDA, "cuDeviceGet"),
DeviceGetCount = apiGetFunctionAddress(NVCUDA, "cuDeviceGetCount"),
DeviceGetName = apiGetFunctionAddress(NVCUDA, "cuDeviceGetName"),
DeviceGetUuid = NVCUDA.getFunctionAddress("cuDeviceGetUuid"),
DeviceGetUuid_v2 = NVCUDA.getFunctionAddress("cuDeviceGetUuid_v2"),
DeviceGetLuid = NVCUDA.getFunctionAddress("cuDeviceGetLuid"),
DeviceTotalMem = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuDeviceTotalMem", 2)),
DeviceGetTexture1DLinearMaxWidth = NVCUDA.getFunctionAddress("cuDeviceGetTexture1DLinearMaxWidth"),
DeviceGetAttribute = apiGetFunctionAddress(NVCUDA, "cuDeviceGetAttribute"),
DeviceGetNvSciSyncAttributes = NVCUDA.getFunctionAddress("cuDeviceGetNvSciSyncAttributes"),
DeviceSetMemPool = NVCUDA.getFunctionAddress("cuDeviceSetMemPool"),
DeviceGetMemPool = NVCUDA.getFunctionAddress("cuDeviceGetMemPool"),
DeviceGetDefaultMemPool = NVCUDA.getFunctionAddress("cuDeviceGetDefaultMemPool"),
FlushGPUDirectRDMAWrites = NVCUDA.getFunctionAddress("cuFlushGPUDirectRDMAWrites"),
DeviceGetProperties = apiGetFunctionAddress(NVCUDA, "cuDeviceGetProperties"),
DeviceComputeCapability = apiGetFunctionAddress(NVCUDA, "cuDeviceComputeCapability"),
DevicePrimaryCtxRetain = NVCUDA.getFunctionAddress("cuDevicePrimaryCtxRetain"),
DevicePrimaryCtxRelease = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxRelease", 2)),
DevicePrimaryCtxSetFlags = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxSetFlags", 2)),
DevicePrimaryCtxGetState = NVCUDA.getFunctionAddress("cuDevicePrimaryCtxGetState"),
DevicePrimaryCtxReset = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxReset", 2)),
DeviceGetExecAffinitySupport = NVCUDA.getFunctionAddress("cuDeviceGetExecAffinitySupport"),
CtxCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuCtxCreate", 2)),
CtxCreate_v3 = NVCUDA.getFunctionAddress("cuCtxCreate_v3"),
CtxDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxDestroy", 2)),
CtxPushCurrent = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxPushCurrent", 2)),
CtxPopCurrent = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxPopCurrent", 2)),
CtxSetCurrent = NVCUDA.getFunctionAddress("cuCtxSetCurrent"),
CtxGetCurrent = NVCUDA.getFunctionAddress("cuCtxGetCurrent"),
CtxGetDevice = apiGetFunctionAddress(NVCUDA, "cuCtxGetDevice"),
CtxGetFlags = NVCUDA.getFunctionAddress("cuCtxGetFlags"),
CtxSynchronize = apiGetFunctionAddress(NVCUDA, "cuCtxSynchronize"),
CtxSetLimit = apiGetFunctionAddress(NVCUDA, "cuCtxSetLimit"),
CtxGetLimit = apiGetFunctionAddress(NVCUDA, "cuCtxGetLimit"),
CtxGetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuCtxGetCacheConfig"),
CtxSetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuCtxSetCacheConfig"),
CtxGetSharedMemConfig = NVCUDA.getFunctionAddress("cuCtxGetSharedMemConfig"),
CtxSetSharedMemConfig = NVCUDA.getFunctionAddress("cuCtxSetSharedMemConfig"),
CtxGetApiVersion = apiGetFunctionAddress(NVCUDA, "cuCtxGetApiVersion"),
CtxGetStreamPriorityRange = apiGetFunctionAddress(NVCUDA, "cuCtxGetStreamPriorityRange"),
CtxResetPersistingL2Cache = NVCUDA.getFunctionAddress("cuCtxResetPersistingL2Cache"),
CtxGetExecAffinity = NVCUDA.getFunctionAddress("cuCtxGetExecAffinity"),
CtxAttach = apiGetFunctionAddress(NVCUDA, "cuCtxAttach"),
CtxDetach = apiGetFunctionAddress(NVCUDA, "cuCtxDetach"),
ModuleLoad = apiGetFunctionAddress(NVCUDA, "cuModuleLoad"),
ModuleLoadData = apiGetFunctionAddress(NVCUDA, "cuModuleLoadData"),
ModuleLoadDataEx = apiGetFunctionAddress(NVCUDA, "cuModuleLoadDataEx"),
ModuleLoadFatBinary = apiGetFunctionAddress(NVCUDA, "cuModuleLoadFatBinary"),
ModuleUnload = apiGetFunctionAddress(NVCUDA, "cuModuleUnload"),
ModuleGetFunction = apiGetFunctionAddress(NVCUDA, "cuModuleGetFunction"),
ModuleGetGlobal = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuModuleGetGlobal", 2)),
ModuleGetTexRef = apiGetFunctionAddress(NVCUDA, "cuModuleGetTexRef"),
ModuleGetSurfRef = apiGetFunctionAddress(NVCUDA, "cuModuleGetSurfRef"),
LinkCreate = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkCreate", 2)),
LinkAddData = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkAddData", 2)),
LinkAddFile = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkAddFile", 2)),
LinkComplete = NVCUDA.getFunctionAddress("cuLinkComplete"),
LinkDestroy = NVCUDA.getFunctionAddress("cuLinkDestroy"),
MemGetInfo = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemGetInfo", 2)),
MemAlloc = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAlloc", 2)),
MemAllocPitch = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAllocPitch", 2)),
MemFree = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemFree", 2)),
MemGetAddressRange = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemGetAddressRange", 2)),
MemAllocHost = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAllocHost", 2)),
MemFreeHost = apiGetFunctionAddress(NVCUDA, "cuMemFreeHost"),
MemHostAlloc = apiGetFunctionAddress(NVCUDA, "cuMemHostAlloc"),
MemHostGetDevicePointer = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemHostGetDevicePointer", 2)),
MemHostGetFlags = apiGetFunctionAddress(NVCUDA, "cuMemHostGetFlags"),
MemAllocManaged = NVCUDA.getFunctionAddress("cuMemAllocManaged"),
DeviceGetByPCIBusId = NVCUDA.getFunctionAddress("cuDeviceGetByPCIBusId"),
DeviceGetPCIBusId = NVCUDA.getFunctionAddress("cuDeviceGetPCIBusId"),
IpcGetEventHandle = NVCUDA.getFunctionAddress("cuIpcGetEventHandle"),
IpcOpenEventHandle$Address = NVCUDA.getFunctionAddress("cuIpcOpenEventHandle"),
IpcGetMemHandle = NVCUDA.getFunctionAddress("cuIpcGetMemHandle"),
IpcOpenMemHandle$Address = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuIpcOpenMemHandle", 2)),
IpcCloseMemHandle = NVCUDA.getFunctionAddress("cuIpcCloseMemHandle"),
MemHostRegister = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuMemHostRegister", 2)),
MemHostUnregister = NVCUDA.getFunctionAddress("cuMemHostUnregister"),
Memcpy = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpy")),
MemcpyPeer = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpyPeer")),
MemcpyHtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyHtoD", 2))),
MemcpyDtoH = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoH", 2))),
MemcpyDtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoD", 2))),
MemcpyDtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoA", 2))),
MemcpyAtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoD", 2))),
MemcpyHtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyHtoA", 2))),
MemcpyAtoH = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoH", 2))),
MemcpyAtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoA", 2))),
Memcpy2D = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy2D", 2))),
Memcpy2DUnaligned = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy2DUnaligned", 2))),
Memcpy3D = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy3D", 2))),
Memcpy3DPeer = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpy3DPeer")),
MemcpyAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpyAsync")),
MemcpyPeerAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpyPeerAsync")),
MemcpyHtoDAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyHtoDAsync", 2))),
MemcpyDtoHAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyDtoHAsync", 2))),
MemcpyDtoDAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyDtoDAsync", 2))),
MemcpyHtoAAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyHtoAAsync", 2))),
MemcpyAtoHAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyAtoHAsync", 2))),
Memcpy2DAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpy2DAsync", 2))),
Memcpy3DAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpy3DAsync", 2))),
Memcpy3DPeerAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpy3DPeerAsync")),
MemsetD8 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD8", 2))),
MemsetD16 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD16", 2))),
MemsetD32 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD32", 2))),
MemsetD2D8 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D8", 2))),
MemsetD2D16 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D16", 2))),
MemsetD2D32 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D32", 2))),
MemsetD8Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD8Async")),
MemsetD16Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD16Async")),
MemsetD32Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD32Async")),
MemsetD2D8Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D8Async")),
MemsetD2D16Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D16Async")),
MemsetD2D32Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D32Async")),
ArrayCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArrayCreate", 2)),
ArrayGetDescriptor = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArrayGetDescriptor", 2)),
ArrayGetSparseProperties = NVCUDA.getFunctionAddress("cuArrayGetSparseProperties"),
MipmappedArrayGetSparseProperties = NVCUDA.getFunctionAddress("cuMipmappedArrayGetSparseProperties"),
ArrayGetPlane = NVCUDA.getFunctionAddress("cuArrayGetPlane"),
ArrayDestroy = apiGetFunctionAddress(NVCUDA, "cuArrayDestroy"),
Array3DCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArray3DCreate", 2)),
Array3DGetDescriptor = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArray3DGetDescriptor", 2)),
MipmappedArrayCreate = NVCUDA.getFunctionAddress("cuMipmappedArrayCreate"),
MipmappedArrayGetLevel = NVCUDA.getFunctionAddress("cuMipmappedArrayGetLevel"),
MipmappedArrayDestroy = NVCUDA.getFunctionAddress("cuMipmappedArrayDestroy"),
MemAddressReserve = NVCUDA.getFunctionAddress("cuMemAddressReserve"),
MemAddressFree = NVCUDA.getFunctionAddress("cuMemAddressFree"),
MemCreate = NVCUDA.getFunctionAddress("cuMemCreate"),
MemRelease = NVCUDA.getFunctionAddress("cuMemRelease"),
MemMap = NVCUDA.getFunctionAddress("cuMemMap"),
MemMapArrayAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemMapArrayAsync")),
MemUnmap = NVCUDA.getFunctionAddress("cuMemUnmap"),
MemSetAccess = NVCUDA.getFunctionAddress("cuMemSetAccess"),
MemGetAccess = NVCUDA.getFunctionAddress("cuMemGetAccess"),
MemExportToShareableHandle = NVCUDA.getFunctionAddress("cuMemExportToShareableHandle"),
MemImportFromShareableHandle = NVCUDA.getFunctionAddress("cuMemImportFromShareableHandle"),
MemGetAllocationGranularity = NVCUDA.getFunctionAddress("cuMemGetAllocationGranularity"),
MemGetAllocationPropertiesFromHandle = NVCUDA.getFunctionAddress("cuMemGetAllocationPropertiesFromHandle"),
MemRetainAllocationHandle = NVCUDA.getFunctionAddress("cuMemRetainAllocationHandle"),
MemFreeAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemFreeAsync")),
MemAllocAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemAllocAsync")),
MemPoolTrimTo = NVCUDA.getFunctionAddress("cuMemPoolTrimTo"),
MemPoolSetAttribute = NVCUDA.getFunctionAddress("cuMemPoolSetAttribute"),
MemPoolGetAttribute = NVCUDA.getFunctionAddress("cuMemPoolGetAttribute"),
MemPoolSetAccess = NVCUDA.getFunctionAddress("cuMemPoolSetAccess"),
MemPoolGetAccess = NVCUDA.getFunctionAddress("cuMemPoolGetAccess"),
MemPoolCreate = NVCUDA.getFunctionAddress("cuMemPoolCreate"),
MemPoolDestroy = NVCUDA.getFunctionAddress("cuMemPoolDestroy"),
MemAllocFromPoolAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemAllocFromPoolAsync")),
MemPoolExportToShareableHandle = NVCUDA.getFunctionAddress("cuMemPoolExportToShareableHandle"),
MemPoolImportFromShareableHandle = NVCUDA.getFunctionAddress("cuMemPoolImportFromShareableHandle"),
MemPoolExportPointer = NVCUDA.getFunctionAddress("cuMemPoolExportPointer"),
MemPoolImportPointer = NVCUDA.getFunctionAddress("cuMemPoolImportPointer"),
PointerGetAttribute = NVCUDA.getFunctionAddress("cuPointerGetAttribute"),
MemPrefetchAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemPrefetchAsync")),
MemAdvise = NVCUDA.getFunctionAddress("cuMemAdvise"),
MemRangeGetAttribute = NVCUDA.getFunctionAddress("cuMemRangeGetAttribute"),
MemRangeGetAttributes = NVCUDA.getFunctionAddress("cuMemRangeGetAttributes"),
PointerSetAttribute = NVCUDA.getFunctionAddress("cuPointerSetAttribute"),
PointerGetAttributes = NVCUDA.getFunctionAddress("cuPointerGetAttributes"),
StreamCreate = apiGetFunctionAddress(NVCUDA, "cuStreamCreate"),
StreamCreateWithPriority = apiGetFunctionAddress(NVCUDA, "cuStreamCreateWithPriority"),
StreamGetPriority = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamGetPriority")),
StreamGetFlags = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamGetFlags")),
StreamGetCtx = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCtx")),
StreamWaitEvent = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamWaitEvent")),
StreamAddCallback = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamAddCallback")),
StreamBeginCapture = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBeginCapture")),
StreamBeginCapture_v2 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBeginCapture_v2")),
ThreadExchangeStreamCaptureMode = NVCUDA.getFunctionAddress("cuThreadExchangeStreamCaptureMode"),
StreamEndCapture = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamEndCapture")),
StreamIsCapturing = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamIsCapturing")),
StreamGetCaptureInfo = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCaptureInfo")),
StreamGetCaptureInfo_v2 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCaptureInfo_v2")),
StreamUpdateCaptureDependencies = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamUpdateCaptureDependencies")),
StreamAttachMemAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamAttachMemAsync")),
StreamQuery = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamQuery")),
StreamSynchronize = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamSynchronize")),
StreamDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuStreamDestroy", 2)),
StreamCopyAttributes = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamCopyAttributes")),
StreamGetAttribute = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetAttribute")),
StreamSetAttribute = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamSetAttribute")),
EventCreate = apiGetFunctionAddress(NVCUDA, "cuEventCreate"),
EventRecord = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuEventRecord")),
EventRecordWithFlags = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuEventRecordWithFlags")),
EventQuery = apiGetFunctionAddress(NVCUDA, "cuEventQuery"),
EventSynchronize = apiGetFunctionAddress(NVCUDA, "cuEventSynchronize"),
EventDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuEventDestroy", 2)),
EventElapsedTime = apiGetFunctionAddress(NVCUDA, "cuEventElapsedTime"),
ImportExternalMemory = NVCUDA.getFunctionAddress("cuImportExternalMemory"),
ExternalMemoryGetMappedBuffer = NVCUDA.getFunctionAddress("cuExternalMemoryGetMappedBuffer"),
ExternalMemoryGetMappedMipmappedArray = NVCUDA.getFunctionAddress("cuExternalMemoryGetMappedMipmappedArray"),
DestroyExternalMemory = NVCUDA.getFunctionAddress("cuDestroyExternalMemory"),
ImportExternalSemaphore = NVCUDA.getFunctionAddress("cuImportExternalSemaphore"),
SignalExternalSemaphoresAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuSignalExternalSemaphoresAsync")),
WaitExternalSemaphoresAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuWaitExternalSemaphoresAsync")),
DestroyExternalSemaphore = NVCUDA.getFunctionAddress("cuDestroyExternalSemaphore"),
StreamWaitValue32 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWaitValue32")),
StreamWaitValue64 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWaitValue64")),
StreamWriteValue32 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWriteValue32")),
StreamWriteValue64 = NVCUDA.getFunctionAddress("cuStreamWriteValue64"),
StreamBatchMemOp = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBatchMemOp")),
FuncGetAttribute = apiGetFunctionAddress(NVCUDA, "cuFuncGetAttribute"),
FuncSetAttribute = NVCUDA.getFunctionAddress("cuFuncSetAttribute"),
FuncSetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuFuncSetCacheConfig"),
FuncSetSharedMemConfig = NVCUDA.getFunctionAddress("cuFuncSetSharedMemConfig"),
FuncGetModule = NVCUDA.getFunctionAddress("cuFuncGetModule"),
LaunchKernel = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchKernel")),
LaunchCooperativeKernel = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchCooperativeKernel")),
LaunchCooperativeKernelMultiDevice = NVCUDA.getFunctionAddress("cuLaunchCooperativeKernelMultiDevice"),
LaunchHostFunc = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchHostFunc")),
FuncSetBlockShape = apiGetFunctionAddress(NVCUDA, "cuFuncSetBlockShape"),
FuncSetSharedSize = apiGetFunctionAddress(NVCUDA, "cuFuncSetSharedSize"),
ParamSetSize = apiGetFunctionAddress(NVCUDA, "cuParamSetSize"),
ParamSeti = apiGetFunctionAddress(NVCUDA, "cuParamSeti"),
ParamSetf = apiGetFunctionAddress(NVCUDA, "cuParamSetf"),
ParamSetv = apiGetFunctionAddress(NVCUDA, "cuParamSetv"),
Launch = apiGetFunctionAddress(NVCUDA, "cuLaunch"),
LaunchGrid = apiGetFunctionAddress(NVCUDA, "cuLaunchGrid"),
LaunchGridAsync = apiGetFunctionAddress(NVCUDA, "cuLaunchGridAsync"),
ParamSetTexRef = apiGetFunctionAddress(NVCUDA, "cuParamSetTexRef"),
GraphCreate = NVCUDA.getFunctionAddress("cuGraphCreate"),
GraphAddKernelNode = NVCUDA.getFunctionAddress("cuGraphAddKernelNode"),
GraphKernelNodeGetParams = NVCUDA.getFunctionAddress("cuGraphKernelNodeGetParams"),
GraphKernelNodeSetParams = NVCUDA.getFunctionAddress("cuGraphKernelNodeSetParams"),
GraphAddMemcpyNode = NVCUDA.getFunctionAddress("cuGraphAddMemcpyNode"),
GraphMemcpyNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemcpyNodeGetParams"),
GraphMemcpyNodeSetParams = NVCUDA.getFunctionAddress("cuGraphMemcpyNodeSetParams"),
GraphAddMemsetNode = NVCUDA.getFunctionAddress("cuGraphAddMemsetNode"),
GraphMemsetNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemsetNodeGetParams"),
GraphMemsetNodeSetParams = NVCUDA.getFunctionAddress("cuGraphMemsetNodeSetParams"),
GraphAddHostNode = NVCUDA.getFunctionAddress("cuGraphAddHostNode"),
GraphHostNodeGetParams = NVCUDA.getFunctionAddress("cuGraphHostNodeGetParams"),
GraphHostNodeSetParams = NVCUDA.getFunctionAddress("cuGraphHostNodeSetParams"),
GraphAddChildGraphNode = NVCUDA.getFunctionAddress("cuGraphAddChildGraphNode"),
GraphChildGraphNodeGetGraph = NVCUDA.getFunctionAddress("cuGraphChildGraphNodeGetGraph"),
GraphAddEmptyNode = NVCUDA.getFunctionAddress("cuGraphAddEmptyNode"),
GraphAddEventRecordNode = NVCUDA.getFunctionAddress("cuGraphAddEventRecordNode"),
GraphEventRecordNodeGetEvent = NVCUDA.getFunctionAddress("cuGraphEventRecordNodeGetEvent"),
GraphEventRecordNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphEventRecordNodeSetEvent"),
GraphAddEventWaitNode = NVCUDA.getFunctionAddress("cuGraphAddEventWaitNode"),
GraphEventWaitNodeGetEvent = NVCUDA.getFunctionAddress("cuGraphEventWaitNodeGetEvent"),
GraphEventWaitNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphEventWaitNodeSetEvent"),
GraphAddExternalSemaphoresSignalNode = NVCUDA.getFunctionAddress("cuGraphAddExternalSemaphoresSignalNode"),
GraphExternalSemaphoresSignalNodeGetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresSignalNodeGetParams"),
GraphExternalSemaphoresSignalNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresSignalNodeSetParams"),
GraphAddExternalSemaphoresWaitNode = NVCUDA.getFunctionAddress("cuGraphAddExternalSemaphoresWaitNode"),
GraphExternalSemaphoresWaitNodeGetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresWaitNodeGetParams"),
GraphExternalSemaphoresWaitNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresWaitNodeSetParams"),
GraphAddMemAllocNode = NVCUDA.getFunctionAddress("cuGraphAddMemAllocNode"),
GraphMemAllocNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemAllocNodeGetParams"),
GraphAddMemFreeNode = NVCUDA.getFunctionAddress("cuGraphAddMemFreeNode"),
GraphMemFreeNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemFreeNodeGetParams"),
DeviceGraphMemTrim = NVCUDA.getFunctionAddress("cuDeviceGraphMemTrim"),
DeviceGetGraphMemAttribute = NVCUDA.getFunctionAddress("cuDeviceGetGraphMemAttribute"),
DeviceSetGraphMemAttribute = NVCUDA.getFunctionAddress("cuDeviceSetGraphMemAttribute"),
GraphClone = NVCUDA.getFunctionAddress("cuGraphClone"),
GraphNodeFindInClone = NVCUDA.getFunctionAddress("cuGraphNodeFindInClone"),
GraphNodeGetType = NVCUDA.getFunctionAddress("cuGraphNodeGetType"),
GraphGetNodes = NVCUDA.getFunctionAddress("cuGraphGetNodes"),
GraphGetRootNodes = NVCUDA.getFunctionAddress("cuGraphGetRootNodes"),
GraphGetEdges = NVCUDA.getFunctionAddress("cuGraphGetEdges"),
GraphNodeGetDependencies = NVCUDA.getFunctionAddress("cuGraphNodeGetDependencies"),
GraphNodeGetDependentNodes = NVCUDA.getFunctionAddress("cuGraphNodeGetDependentNodes"),
GraphAddDependencies = NVCUDA.getFunctionAddress("cuGraphAddDependencies"),
GraphRemoveDependencies = NVCUDA.getFunctionAddress("cuGraphRemoveDependencies"),
GraphDestroyNode = NVCUDA.getFunctionAddress("cuGraphDestroyNode"),
GraphInstantiate = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuGraphInstantiate", 2)),
GraphInstantiateWithFlags = NVCUDA.getFunctionAddress("cuGraphInstantiateWithFlags"),
GraphExecKernelNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecKernelNodeSetParams"),
GraphExecMemcpyNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecMemcpyNodeSetParams"),
GraphExecMemsetNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecMemsetNodeSetParams"),
GraphExecHostNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecHostNodeSetParams"),
GraphExecChildGraphNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecChildGraphNodeSetParams"),
GraphExecEventRecordNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphExecEventRecordNodeSetEvent"),
GraphExecEventWaitNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphExecEventWaitNodeSetEvent"),
GraphExecExternalSemaphoresSignalNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecExternalSemaphoresSignalNodeSetParams"),
GraphExecExternalSemaphoresWaitNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecExternalSemaphoresWaitNodeSetParams"),
GraphUpload = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuGraphUpload")),
GraphLaunch = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuGraphLaunch")),
GraphExecDestroy = NVCUDA.getFunctionAddress("cuGraphExecDestroy"),
GraphDestroy = NVCUDA.getFunctionAddress("cuGraphDestroy"),
GraphExecUpdate = NVCUDA.getFunctionAddress("cuGraphExecUpdate"),
GraphKernelNodeCopyAttributes = NVCUDA.getFunctionAddress("cuGraphKernelNodeCopyAttributes"),
GraphKernelNodeGetAttribute = NVCUDA.getFunctionAddress("cuGraphKernelNodeGetAttribute"),
GraphKernelNodeSetAttribute = NVCUDA.getFunctionAddress("cuGraphKernelNodeSetAttribute"),
GraphDebugDotPrint = NVCUDA.getFunctionAddress("cuGraphDebugDotPrint"),
UserObjectCreate = NVCUDA.getFunctionAddress("cuUserObjectCreate"),
UserObjectRetain = NVCUDA.getFunctionAddress("cuUserObjectRetain"),
UserObjectRelease = NVCUDA.getFunctionAddress("cuUserObjectRelease"),
GraphRetainUserObject = NVCUDA.getFunctionAddress("cuGraphRetainUserObject"),
GraphReleaseUserObject = NVCUDA.getFunctionAddress("cuGraphReleaseUserObject"),
OccupancyMaxActiveBlocksPerMultiprocessor = NVCUDA.getFunctionAddress("cuOccupancyMaxActiveBlocksPerMultiprocessor"),
OccupancyMaxActiveBlocksPerMultiprocessorWithFlags = NVCUDA.getFunctionAddress("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"),
OccupancyMaxPotentialBlockSize = NVCUDA.getFunctionAddress("cuOccupancyMaxPotentialBlockSize"),
OccupancyMaxPotentialBlockSizeWithFlags = NVCUDA.getFunctionAddress("cuOccupancyMaxPotentialBlockSizeWithFlags"),
OccupancyAvailableDynamicSMemPerBlock = NVCUDA.getFunctionAddress("cuOccupancyAvailableDynamicSMemPerBlock"),
TexRefSetArray = apiGetFunctionAddress(NVCUDA, "cuTexRefSetArray"),
TexRefSetMipmappedArray = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmappedArray"),
TexRefSetAddress = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefSetAddress", 2)),
TexRefSetAddress2D = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefSetAddress2D", 3)),
TexRefSetFormat = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFormat"),
TexRefSetAddressMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetAddressMode"),
TexRefSetFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFilterMode"),
TexRefSetMipmapFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapFilterMode"),
TexRefSetMipmapLevelBias = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapLevelBias"),
TexRefSetMipmapLevelClamp = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapLevelClamp"),
TexRefSetMaxAnisotropy = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMaxAnisotropy"),
TexRefSetBorderColor = apiGetFunctionAddress(NVCUDA, "cuTexRefSetBorderColor"),
TexRefSetFlags = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFlags"),
TexRefGetAddress = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefGetAddress", 2)),
TexRefGetArray = apiGetFunctionAddress(NVCUDA, "cuTexRefGetArray"),
TexRefGetMipmappedArray = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmappedArray"),
TexRefGetAddressMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetAddressMode"),
TexRefGetFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFilterMode"),
TexRefGetFormat = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFormat"),
TexRefGetMipmapFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapFilterMode"),
TexRefGetMipmapLevelBias = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapLevelBias"),
TexRefGetMipmapLevelClamp = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapLevelClamp"),
TexRefGetMaxAnisotropy = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMaxAnisotropy"),
TexRefGetBorderColor = apiGetFunctionAddress(NVCUDA, "cuTexRefGetBorderColor"),
TexRefGetFlags = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFlags"),
TexRefCreate = apiGetFunctionAddress(NVCUDA, "cuTexRefCreate"),
TexRefDestroy = apiGetFunctionAddress(NVCUDA, "cuTexRefDestroy"),
SurfRefSetArray = apiGetFunctionAddress(NVCUDA, "cuSurfRefSetArray"),
SurfRefGetArray = apiGetFunctionAddress(NVCUDA, "cuSurfRefGetArray"),
TexObjectCreate = NVCUDA.getFunctionAddress("cuTexObjectCreate"),
TexObjectDestroy = NVCUDA.getFunctionAddress("cuTexObjectDestroy"),
TexObjectGetResourceDesc = NVCUDA.getFunctionAddress("cuTexObjectGetResourceDesc"),
TexObjectGetTextureDesc = NVCUDA.getFunctionAddress("cuTexObjectGetTextureDesc"),
TexObjectGetResourceViewDesc = NVCUDA.getFunctionAddress("cuTexObjectGetResourceViewDesc"),
SurfObjectCreate = NVCUDA.getFunctionAddress("cuSurfObjectCreate"),
SurfObjectDestroy = NVCUDA.getFunctionAddress("cuSurfObjectDestroy"),
SurfObjectGetResourceDesc = NVCUDA.getFunctionAddress("cuSurfObjectGetResourceDesc"),
DeviceCanAccessPeer = NVCUDA.getFunctionAddress("cuDeviceCanAccessPeer"),
CtxEnablePeerAccess = NVCUDA.getFunctionAddress("cuCtxEnablePeerAccess"),
CtxDisablePeerAccess = NVCUDA.getFunctionAddress("cuCtxDisablePeerAccess"),
DeviceGetP2PAttribute = NVCUDA.getFunctionAddress("cuDeviceGetP2PAttribute"),
GraphicsUnregisterResource = apiGetFunctionAddress(NVCUDA, "cuGraphicsUnregisterResource"),
GraphicsSubResourceGetMappedArray = apiGetFunctionAddress(NVCUDA, "cuGraphicsSubResourceGetMappedArray"),
GraphicsResourceGetMappedMipmappedArray = NVCUDA.getFunctionAddress("cuGraphicsResourceGetMappedMipmappedArray"),
GraphicsResourceGetMappedPointer = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuGraphicsResourceGetMappedPointer", 2)),
GraphicsResourceSetMapFlags = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuGraphicsResourceSetMapFlags", 2)),
GraphicsMapResources = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuGraphicsMapResources")),
GraphicsUnmapResources = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuGraphicsUnmapResources")),
GetProcAddress = NVCUDA.getFunctionAddress("cuGetProcAddress"),
GetExportTable = apiGetFunctionAddress(NVCUDA, "cuGetExportTable");
}
/** Returns the NVCUDA {@link SharedLibrary}. */
public static SharedLibrary getLibrary() {
return NVCUDA;
}
public static final int CU_IPC_HANDLE_SIZE = 64;
/**
* CUDA Ipc Mem Flags. ({@code CUipcMem_flags})
*
* Enum values:
*
*
* - {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS} - Automatically enable peer access between remote devices as needed
*
*/
public static final int CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1;
/**
* CUDA Mem Attach Flags. ({@code CUmemAttach_flags})
*
* Enum values:
*
*
* - {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} - Memory can be accessed by any stream on any device
* - {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} - Memory cannot be accessed by any stream on any device
* - {@link #CU_MEM_ATTACH_SINGLE MEM_ATTACH_SINGLE} - Memory can only be accessed by a single stream on the associated device
*
*/
public static final int
CU_MEM_ATTACH_GLOBAL = 0x1,
CU_MEM_ATTACH_HOST = 0x2,
CU_MEM_ATTACH_SINGLE = 0x4;
/**
* Context creation flags. ({@code CUctx_flags})
*
* Enum values:
*
*
* - {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} - Automatic scheduling
* - {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN} - Set spin as default scheduling
* - {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD} - Set yield as default scheduling
* - {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} - Set blocking synchronization as default scheduling
* - {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC} - Set blocking synchronization as default scheduling. This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.
* - {@link #CU_CTX_SCHED_MASK CTX_SCHED_MASK}
* - {@link #CU_CTX_MAP_HOST CTX_MAP_HOST} -
* This flag was deprecated as of CUDA 11.0 and it no longer has any effect.
*
*
All contexts as of CUDA 3.2 behave as though the flag is enabled.
*
* - {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX} - Keep local memory allocation after launch
* - {@link #CU_CTX_FLAGS_MASK CTX_FLAGS_MASK}
*
*/
public static final int
CU_CTX_SCHED_AUTO = 0x0,
CU_CTX_SCHED_SPIN = 0x1,
CU_CTX_SCHED_YIELD = 0x2,
CU_CTX_SCHED_BLOCKING_SYNC = 0x4,
CU_CTX_BLOCKING_SYNC = 0x4,
CU_CTX_SCHED_MASK = 0x7,
CU_CTX_MAP_HOST = 0x8,
CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
CU_CTX_FLAGS_MASK = 0x1F;
/**
* Stream creation flags. ({@code CUstream_flags})
*
* Enum values:
*
*
* - {@link #CU_STREAM_DEFAULT STREAM_DEFAULT} - Default stream flag
* - {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} - Stream does not synchronize with stream 0 (the {@code NULL} stream)
*
*/
public static final int
CU_STREAM_DEFAULT = 0x0,
CU_STREAM_NON_BLOCKING = 0x1;
/**
* Legacy stream handle.
*
* Stream handle that can be passed as a {@code CUstream} to use an implicit stream with legacy synchronization behavior.
*/
public static final long CU_STREAM_LEGACY = 0x1L;
/**
* Per-thread stream handle.
*
* Stream handle that can be passed as a {@code CUstream} to use an implicit stream with per-thread synchronization behavior.
*/
public static final long CU_STREAM_PER_THREAD = 0x2L;
/**
* Event creation flags. ({@code CUevent_flags})
*
* Enum values:
*
*
* - {@link #CU_EVENT_DEFAULT EVENT_DEFAULT} - Default event flag
* - {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} - Event uses blocking synchronization
* - {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} - Event will not record timing data
* - {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} - Event is suitable for interprocess use. {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} must be set
*
*/
public static final int
CU_EVENT_DEFAULT = 0x0,
CU_EVENT_BLOCKING_SYNC = 0x1,
CU_EVENT_DISABLE_TIMING = 0x2,
CU_EVENT_INTERPROCESS = 0x4;
/**
* Event record flags. ({@code CUevent_record_flags})
*
* Enum values:
*
*
* - {@link #CU_EVENT_RECORD_DEFAULT EVENT_RECORD_DEFAULT} - Default event record flag
* - {@link #CU_EVENT_RECORD_EXTERNAL EVENT_RECORD_EXTERNAL} -
* When using stream capture, create an event record node instead of the default behavior.
*
*
This flag is invalid when used outside of capture.
*
*
*/
public static final int
CU_EVENT_RECORD_DEFAULT = 0x0,
CU_EVENT_RECORD_EXTERNAL = 0x1;
/**
* Event wait flags. ({@code CUevent_wait_flags})
*
* Enum values:
*
*
* - {@link #CU_EVENT_WAIT_DEFAULT EVENT_WAIT_DEFAULT} - Default event wait flag
* - {@link #CU_EVENT_WAIT_EXTERNAL EVENT_WAIT_EXTERNAL} -
* When using stream capture, create an event wait node instead of the default behavior.
*
*
This flag is invalid when used outside of capture.
*
*
*/
public static final int
CU_EVENT_WAIT_DEFAULT = 0x0,
CU_EVENT_WAIT_EXTERNAL = 0x1;
/**
* Flags for {@link #cuStreamWaitValue32 StreamWaitValue32} and {@link #cuStreamWaitValue64 StreamWaitValue64}. ({@code CUstreamWaitValue_flags})
*
* Enum values:
*
*
* - {@link #CU_STREAM_WAIT_VALUE_GEQ STREAM_WAIT_VALUE_GEQ} -
* Wait until {@code (int32_t)(*addr - value) >= 0} (or {@code int64_t} for 64 bit values). Note this is a cyclic comparison which ignores
* wraparound. (Default behavior.)
*
* - {@link #CU_STREAM_WAIT_VALUE_EQ STREAM_WAIT_VALUE_EQ} - Wait until {@code *addr == value}.
* - {@link #CU_STREAM_WAIT_VALUE_AND STREAM_WAIT_VALUE_AND} - Wait until {@code (*addr & value) != 0}.
* - {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} -
* Wait until {@code ~(*addr | value) != 0}. Support for this operation can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and
* {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR}.
*
* - {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} -
* Follow the wait operation with a flush of outstanding remote writes.
*
*
This means that, if a remote write operation is guaranteed to have reached the device before the wait can be satisfied, that write is guaranteed to
* be visible to downstream device work. The device is permitted to reorder remote writes internally. For example, this flag would be required if two
* remote writes arrive in a defined order, the wait is satisfied by the second write, and downstream work needs to observe the first write.
*
* Support for this operation is restricted to selected platforms and can be queried with {@code CU_DEVICE_ATTRIBUTE_CAN_USE_WAIT_VALUE_FLUSH}.
*
*
*/
public static final int
CU_STREAM_WAIT_VALUE_GEQ = 0x0,
CU_STREAM_WAIT_VALUE_EQ = 0x1,
CU_STREAM_WAIT_VALUE_AND = 0x2,
CU_STREAM_WAIT_VALUE_NOR = 0x3,
CU_STREAM_WAIT_VALUE_FLUSH = 1<<30;
/**
* Flags for {@link #cuStreamWriteValue32 StreamWriteValue32}. ({@code CUstreamWriteValue_flags})
*
* Enum values:
*
*
* - {@link #CU_STREAM_WRITE_VALUE_DEFAULT STREAM_WRITE_VALUE_DEFAULT} - Default behavior
* - {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} -
* Permits the write to be reordered with writes which were issued before it, as a performance optimization.
*
*
Normally, {@link #cuStreamWriteValue32 StreamWriteValue32} will provide a memory fence before the write, which has similar semantics to {@code __threadfence_system()} but is
* scoped to the stream rather than a CUDA thread.
*
*
*/
public static final int
CU_STREAM_WRITE_VALUE_DEFAULT = 0x0,
CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1;
/**
* Operations for {@link #cuStreamBatchMemOp StreamBatchMemOp}. ({@code CUstreamBatchMemOpType})
*
* Enum values:
*
*
* - {@link #CU_STREAM_MEM_OP_WAIT_VALUE_32 STREAM_MEM_OP_WAIT_VALUE_32} - Represents a {@link #cuStreamWaitValue32 StreamWaitValue32} operation
* - {@link #CU_STREAM_MEM_OP_WRITE_VALUE_32 STREAM_MEM_OP_WRITE_VALUE_32} - Represents a {@link #cuStreamWriteValue32 StreamWriteValue32} operation
* - {@link #CU_STREAM_MEM_OP_WAIT_VALUE_64 STREAM_MEM_OP_WAIT_VALUE_64} - Represents a {@link #cuStreamWaitValue64 StreamWaitValue64} operation
* - {@link #CU_STREAM_MEM_OP_WRITE_VALUE_64 STREAM_MEM_OP_WRITE_VALUE_64} - Represents a {@link #cuStreamWriteValue64 StreamWriteValue64} operation
* - {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} - This has the same effect as {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH}, but as a standalone operation.
*
*/
public static final int
CU_STREAM_MEM_OP_WAIT_VALUE_32 = 0x1,
CU_STREAM_MEM_OP_WRITE_VALUE_32 = 0x2,
CU_STREAM_MEM_OP_WAIT_VALUE_64 = 0x4,
CU_STREAM_MEM_OP_WRITE_VALUE_64 = 0x5,
CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 0x3;
/**
* Occupancy calculator flag. ({@code CUoccupancy_flags})
*
* Enum values:
*
*
* - {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT} - Default behavior
* - {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} - Assume global caching is enabled and cannot be automatically turned off
*
*/
public static final int
CU_OCCUPANCY_DEFAULT = 0x0,
CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1;
/**
* Flags for {@link #cuStreamUpdateCaptureDependencies StreamUpdateCaptureDependencies}). ({@code CUstreamUpdateCaptureDependencies_flags})
*
* Enum values:
*
*
* - {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES} - Add new nodes to the dependency set
* - {@link #CU_STREAM_SET_CAPTURE_DEPENDENCIES STREAM_SET_CAPTURE_DEPENDENCIES} - Replace the dependency set with the new nodes
*
*/
public static final int
CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0x0,
CU_STREAM_SET_CAPTURE_DEPENDENCIES = 0x1;
/**
* Array formats. ({@code CUarray_format})
*
* Enum values:
*
*
* - {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} - Unsigned 8-bit integers
* - {@link #CU_AD_FORMAT_UNSIGNED_INT16 AD_FORMAT_UNSIGNED_INT16} - Unsigned 16-bit integers
* - {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32} - Unsigned 32-bit integers
* - {@link #CU_AD_FORMAT_SIGNED_INT8 AD_FORMAT_SIGNED_INT8} - Signed 8-bit integers
* - {@link #CU_AD_FORMAT_SIGNED_INT16 AD_FORMAT_SIGNED_INT16} - Signed 16-bit integers
* - {@link #CU_AD_FORMAT_SIGNED_INT32 AD_FORMAT_SIGNED_INT32} - Signed 32-bit integers
* - {@link #CU_AD_FORMAT_HALF AD_FORMAT_HALF} - 16-bit floating point
* - {@link #CU_AD_FORMAT_FLOAT AD_FORMAT_FLOAT} - 32-bit floating point
* - {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12} - 8-bit YUV planar format, with 4:2:0 sampling
* - {@link #CU_AD_FORMAT_UNORM_INT8X1 AD_FORMAT_UNORM_INT8X1} - 1 channel unsigned 8-bit normalized integer
* - {@link #CU_AD_FORMAT_UNORM_INT8X2 AD_FORMAT_UNORM_INT8X2} - 2 channel unsigned 8-bit normalized integer
* - {@link #CU_AD_FORMAT_UNORM_INT8X4 AD_FORMAT_UNORM_INT8X4} - 4 channel unsigned 8-bit normalized integer
* - {@link #CU_AD_FORMAT_UNORM_INT16X1 AD_FORMAT_UNORM_INT16X1} - 1 channel unsigned 16-bit normalized integer
* - {@link #CU_AD_FORMAT_UNORM_INT16X2 AD_FORMAT_UNORM_INT16X2} - 2 channel unsigned 16-bit normalized integer
* - {@link #CU_AD_FORMAT_UNORM_INT16X4 AD_FORMAT_UNORM_INT16X4} - 4 channel unsigned 16-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT8X1 AD_FORMAT_SNORM_INT8X1} - 1 channel signed 8-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT8X2 AD_FORMAT_SNORM_INT8X2} - 2 channel signed 8-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT8X4 AD_FORMAT_SNORM_INT8X4} - 4 channel signed 8-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT16X1 AD_FORMAT_SNORM_INT16X1} - 1 channel signed 16-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT16X2 AD_FORMAT_SNORM_INT16X2} - 2 channel signed 16-bit normalized integer
* - {@link #CU_AD_FORMAT_SNORM_INT16X4 AD_FORMAT_SNORM_INT16X4} - 4 channel signed 16-bit normalized integer
* - {@link #CU_AD_FORMAT_BC1_UNORM AD_FORMAT_BC1_UNORM} - 4 channel unsigned normalized block-compressed (BC1 compression) format
* - {@link #CU_AD_FORMAT_BC1_UNORM_SRGB AD_FORMAT_BC1_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
* - {@link #CU_AD_FORMAT_BC2_UNORM AD_FORMAT_BC2_UNORM} - 4 channel unsigned normalized block-compressed (BC2 compression) format
* - {@link #CU_AD_FORMAT_BC2_UNORM_SRGB AD_FORMAT_BC2_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
* - {@link #CU_AD_FORMAT_BC3_UNORM AD_FORMAT_BC3_UNORM} - 4 channel unsigned normalized block-compressed (BC3 compression) format
* - {@link #CU_AD_FORMAT_BC3_UNORM_SRGB AD_FORMAT_BC3_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
* - {@link #CU_AD_FORMAT_BC4_UNORM AD_FORMAT_BC4_UNORM} - 1 channel unsigned normalized block-compressed (BC4 compression) format
* - {@link #CU_AD_FORMAT_BC4_SNORM AD_FORMAT_BC4_SNORM} - 1 channel signed normalized block-compressed (BC4 compression) format
* - {@link #CU_AD_FORMAT_BC5_UNORM AD_FORMAT_BC5_UNORM} - 2 channel unsigned normalized block-compressed (BC5 compression) format
* - {@link #CU_AD_FORMAT_BC5_SNORM AD_FORMAT_BC5_SNORM} - 2 channel signed normalized block-compressed (BC5 compression) format
* - {@link #CU_AD_FORMAT_BC6H_UF16 AD_FORMAT_BC6H_UF16} - 3 channel unsigned half-float block-compressed (BC6H compression) format
* - {@link #CU_AD_FORMAT_BC6H_SF16 AD_FORMAT_BC6H_SF16} - 3 channel signed half-float block-compressed (BC6H compression) format
* - {@link #CU_AD_FORMAT_BC7_UNORM AD_FORMAT_BC7_UNORM} - 4 channel unsigned normalized block-compressed (BC7 compression) format
* - {@link #CU_AD_FORMAT_BC7_UNORM_SRGB AD_FORMAT_BC7_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
*
*/
public static final int
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
CU_AD_FORMAT_HALF = 0x10,
CU_AD_FORMAT_FLOAT = 0x20,
CU_AD_FORMAT_NV12 = 0xb0,
CU_AD_FORMAT_UNORM_INT8X1 = 0xc0,
CU_AD_FORMAT_UNORM_INT8X2 = 0xc1,
CU_AD_FORMAT_UNORM_INT8X4 = 0xc2,
CU_AD_FORMAT_UNORM_INT16X1 = 0xc3,
CU_AD_FORMAT_UNORM_INT16X2 = 0xc4,
CU_AD_FORMAT_UNORM_INT16X4 = 0xc5,
CU_AD_FORMAT_SNORM_INT8X1 = 0xc6,
CU_AD_FORMAT_SNORM_INT8X2 = 0xc7,
CU_AD_FORMAT_SNORM_INT8X4 = 0xc8,
CU_AD_FORMAT_SNORM_INT16X1 = 0xc9,
CU_AD_FORMAT_SNORM_INT16X2 = 0xca,
CU_AD_FORMAT_SNORM_INT16X4 = 0xcb,
CU_AD_FORMAT_BC1_UNORM = 0x91,
CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92,
CU_AD_FORMAT_BC2_UNORM = 0x93,
CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94,
CU_AD_FORMAT_BC3_UNORM = 0x95,
CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96,
CU_AD_FORMAT_BC4_UNORM = 0x97,
CU_AD_FORMAT_BC4_SNORM = 0x98,
CU_AD_FORMAT_BC5_UNORM = 0x99,
CU_AD_FORMAT_BC5_SNORM = 0x9a,
CU_AD_FORMAT_BC6H_UF16 = 0x9b,
CU_AD_FORMAT_BC6H_SF16 = 0x9c,
CU_AD_FORMAT_BC7_UNORM = 0x9d,
CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e;
/**
* Texture reference addressing modes. ({@code CUaddress_mode})
*
* Enum values:
*
*
* - {@link #CU_TR_ADDRESS_MODE_WRAP TR_ADDRESS_MODE_WRAP} - Wrapping address mode
* - {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP} - Clamp to edge address mode
* - {@link #CU_TR_ADDRESS_MODE_MIRROR TR_ADDRESS_MODE_MIRROR} - Mirror address mode
* - {@link #CU_TR_ADDRESS_MODE_BORDER TR_ADDRESS_MODE_BORDER} - Border address mode
*
*/
public static final int
CU_TR_ADDRESS_MODE_WRAP = 0x0,
CU_TR_ADDRESS_MODE_CLAMP = 0x1,
CU_TR_ADDRESS_MODE_MIRROR = 0x2,
CU_TR_ADDRESS_MODE_BORDER = 0x3;
/**
* Texture reference filtering modes. ({@code CUfilter_mode})
*
* Enum values:
*
*
* - {@link #CU_TR_FILTER_MODE_POINT TR_FILTER_MODE_POINT} - Point filter mode
* - {@link #CU_TR_FILTER_MODE_LINEAR TR_FILTER_MODE_LINEAR} - Linear filter mode
*
*/
public static final int
CU_TR_FILTER_MODE_POINT = 0x0,
CU_TR_FILTER_MODE_LINEAR = 0x1;
/**
* Device properties. ({@code CUdevice_attribute})
*
* Enum values:
*
*
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK} - Maximum number of threads per block
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X} - Maximum block dimension X
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y} - Maximum block dimension Y
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z} - Maximum block dimension Z
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X DEVICE_ATTRIBUTE_MAX_GRID_DIM_X} - Maximum grid dimension X
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y} - Maximum grid dimension Y
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z} - Maximum grid dimension Z
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK} - Maximum shared memory available per block in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK}
* - {@link #CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY} - Memory available on device for __constant__ variables in a CUDA C kernel in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_WARP_SIZE DEVICE_ATTRIBUTE_WARP_SIZE} - Warp size in threads
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH} - Maximum pitch in bytes allowed by memory copies
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK} - Maximum number of 32-bit registers available per block
* - {@link #CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK}
* - {@link #CU_DEVICE_ATTRIBUTE_CLOCK_RATE DEVICE_ATTRIBUTE_CLOCK_RATE} - Typical clock frequency in kilohertz
* - {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT} - Alignment requirement for textures
* - {@link #CU_DEVICE_ATTRIBUTE_GPU_OVERLAP DEVICE_ATTRIBUTE_GPU_OVERLAP} - Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead {@link #CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT}.
* - {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT} - Number of multiprocessors on device
* - {@link #CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT} - Specifies whether there is a run time limit on kernels
* - {@link #CU_DEVICE_ATTRIBUTE_INTEGRATED DEVICE_ATTRIBUTE_INTEGRATED} - Device is integrated with host memory
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY} - Device can map host memory into CUDA address space
* - {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} - Compute mode (See {@code CUcomputemode} for details)
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH} - Maximum 1D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH} - Maximum 2D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT} - Maximum 2D texture height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH} - Maximum 3D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT} - Maximum 3D texture height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH} - Maximum 3D texture depth
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH} - Maximum 2D layered texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT} - Maximum 2D layered texture height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS} - Maximum layers in a 2D layered texture
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH}
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT}
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS}
* - {@link #CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT} - Alignment requirement for surfaces
* - {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS DEVICE_ATTRIBUTE_CONCURRENT_KERNELS} - Device can possibly execute multiple kernels concurrently
* - {@link #CU_DEVICE_ATTRIBUTE_ECC_ENABLED DEVICE_ATTRIBUTE_ECC_ENABLED} - Device has ECC support enabled
* - {@link #CU_DEVICE_ATTRIBUTE_PCI_BUS_ID DEVICE_ATTRIBUTE_PCI_BUS_ID} - PCI bus ID of the device
* - {@link #CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID DEVICE_ATTRIBUTE_PCI_DEVICE_ID} - PCI device ID of the device
* - {@link #CU_DEVICE_ATTRIBUTE_TCC_DRIVER DEVICE_ATTRIBUTE_TCC_DRIVER} - Device is using TCC driver model
* - {@link #CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE} - Peak memory clock frequency in kilohertz
* - {@link #CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH} - Global memory bus width in bits
* - {@link #CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE DEVICE_ATTRIBUTE_L2_CACHE_SIZE} - Size of L2 cache in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR} - Maximum resident threads per multiprocessor
* - {@link #CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT} - Number of asynchronous engines
* - {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING} - Device shares a unified address space with the host
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH} - Maximum 1D layered texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS} - Maximum layers in a 1D layered texture
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER} - Deprecated, do not use.
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH} - Maximum 2D texture width if {@link #CUDA_ARRAY3D_TEXTURE_GATHER} is set
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT} - Maximum 2D texture height if {@link #CUDA_ARRAY3D_TEXTURE_GATHER} is set
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE} - Alternate maximum 3D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE} - Alternate maximum 3D texture height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE} - Alternate maximum 3D texture depth
* - {@link #CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID DEVICE_ATTRIBUTE_PCI_DOMAIN_ID} - PCI domain ID of the device
* - {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT} - Pitch alignment requirement for textures
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH} - Maximum cubemap texture width/height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH} - Maximum cubemap layered texture width/height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS} - Maximum layers in a cubemap layered texture
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH} - Maximum 1D surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH} - Maximum 2D surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT} - Maximum 2D surface height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH} - Maximum 3D surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT} - Maximum 3D surface height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH} - Maximum 3D surface depth
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH} - Maximum 1D layered surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS} - Maximum layers in a 1D layered surface
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH} - Maximum 2D layered surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT} - Maximum 2D layered surface height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS} - Maximum layers in a 2D layered surface
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH} - Maximum cubemap surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH} - Maximum cubemap layered surface width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS} - Maximum layers in a cubemap layered surface
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH} - Deprecated, do not use. Use {@code cudaDeviceGetTexture1DLinearMaxWidth()} or {@link #cuDeviceGetTexture1DLinearMaxWidth DeviceGetTexture1DLinearMaxWidth} instead.
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} - Maximum 2D linear texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} - Maximum 2D linear texture height
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH} - Maximum 2D linear texture pitch in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH} - Maximum mipmapped 2D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT} - Maximum mipmapped 2D texture height
* - {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR} - Major compute capability version number
* - {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR} - Minor compute capability version number
* - {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH} - Maximum mipmapped 1D texture width
* - {@link #CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED} - Device supports stream priorities
* - {@link #CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED} - Device supports caching globals in L1
* - {@link #CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED} - Device supports caching locals in L1
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR} - Maximum shared memory available per multiprocessor in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR} - Maximum number of 32-bit registers available per multiprocessor
* - {@link #CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY DEVICE_ATTRIBUTE_MANAGED_MEMORY} - Device can allocate managed memory on this system
* - {@link #CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD DEVICE_ATTRIBUTE_MULTI_GPU_BOARD} - Device is on a multi-GPU board
* - {@link #CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID} - Unique id for a group of devices on the same multi-GPU board
* - {@link #CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED} -
* Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current
* hardware)
*
* - {@link #CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO} - Ratio of single precision performance (in floating-point operations per second) to double precision performance
* - {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS} - Device supports coherently accessing pageable memory without calling cudaHostRegister on it
* - {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} - Device can coherently access managed memory concurrently with the CPU
* - {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED} - Device supports compute preemption.
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM} - Device can access host registered memory at the same virtual address as the CPU
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS} - {@link #cuStreamBatchMemOp StreamBatchMemOp} and related APIs are supported.
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS} - 64-bit operations are supported in {@link #cuStreamBatchMemOp StreamBatchMemOp} and related APIs.
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR} - {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} is supported.
* - {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH} - Device supports launching cooperative kernels via {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}
* - {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH} - Deprecated, {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} is deprecated.
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN} - Maximum optin shared memory per block
* - {@link #CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES} -
* The {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} flag and the {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} MemOp are supported on the device. See {@code CUDA_MEMOP} for
* additional details.
*
* - {@link #CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED} - Device supports host memory registration via {@code cudaHostRegister()}.
* - {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES} - Device accesses pageable memory via the host's page tables.
* - {@link #CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST} - The host can directly access managed memory on the device without migration.
* - {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED} - Deprecated, Use {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED}
* - {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED} - Device supports virtual memory management APIs like {@link #cuMemAddressReserve MemAddressReserve}, {@link #cuMemCreate MemCreate}, {@link #cuMemMap MemMap} and related APIs
* - {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED} - Device supports exporting memory to a posix file descriptor with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
* - {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED} - Device supports exporting memory to a Win32 NT handle with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
* - {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED} - Device supports exporting memory to a Win32 KMT handle with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR} - Maximum number of blocks per multiprocessor
* - {@link #CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED} - Device supports compression of memory
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE} - Maximum L2 persisting lines capacity setting in bytes.
* - {@link #CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE} - Maximum value of {@link CUaccessPolicyWindow}{@code {@code num_bytes}}.
* - {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED} - Device supports specifying the GPUDirect RDMA flag with {@link #cuMemCreate MemCreate}
* - {@link #CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK} - Shared memory reserved by CUDA driver per block in bytes
* - {@link #CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED} - Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
* - {@link #CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED} - Device supports using the {@link #cuMemHostRegister MemHostRegister} flag {@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY} to register memory that must be mapped as read-only to the GPU
* - {@link #CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED} - External timeline semaphore interop is supported on the device
* - {@link #CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED} - Device supports using the {@link #cuMemAllocAsync MemAllocAsync} and {@code cuMemPool*} family of APIs
* - {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED} - Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
* - {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS} -
* The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the {@code CUflushGPUDirectRDMAWritesOptions}
* enum
*
* - {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING} -
* GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See
* {@code CUGPUDirectRDMAWritesOrdering} for the numerical values returned here.
*
* - {@link #CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES} - Handle types supported with mempool based IPC
*
*/
public static final int
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119;
/**
* Pointer information. ({@code CUpointer_attribute})
*
* Enum values:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} - The {@code CUcontext} on which a pointer was allocated or registered
* - {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE} - The {@code CUmemorytype} describing the physical location of a pointer
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} - The address at which a pointer's memory may be accessed on the device
* - {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} - The address at which a pointer's memory may be accessed on the host
* - {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS} - A pair of tokens for use with the {@code nv-p2p.h} Linux kernel interface
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} - Synchronize every synchronous memory operation initiated on this region
* - {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID} - A process-wide unique ID for an allocated memory region
* - {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED} - Indicates if the pointer points to managed memory
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL} - A device ordinal of a device on which a pointer was allocated or registered
* - {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE} - 1 if this pointer maps to an allocation that is suitable for {@code cudaIpcGetMemHandle()}, 0 otherwise
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR} - Starting address for this requested pointer
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE} - Size of the address range for this requested pointer
* - {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED} - 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise
* - {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES} - Bitmask of allowed {@code CUmemAllocationHandleType} for this allocation
* - {@link #CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE} - 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API
* - {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAGS POINTER_ATTRIBUTE_ACCESS_FLAGS} - Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given
* - {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE} - Returns the {@code mempoo}l handle for the allocation if it was allocated from a {@code mempool}. Otherwise returns {@code NULL}.
*
*/
public static final int
CU_POINTER_ATTRIBUTE_CONTEXT = 1,
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10,
CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11,
CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12,
CU_POINTER_ATTRIBUTE_MAPPED = 13,
CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14,
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15,
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16,
CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17;
/**
* Function properties. ({@code CUfunction_attribute})
*
* Enum values:
*
*
* - {@link #CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK} -
* The maximum number of threads per block, beyond which a launch of the function would fail. This number depends on both the function and the device
* on which the function is currently loaded.
*
* - {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES} -
* The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory
* requested by the user at runtime.
*
* - {@link #CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES FUNC_ATTRIBUTE_CONST_SIZE_BYTES} - The size in bytes of user-allocated constant memory required by this function.
* - {@link #CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES} - The size in bytes of local memory used by each thread of this function.
* - {@link #CU_FUNC_ATTRIBUTE_NUM_REGS FUNC_ATTRIBUTE_NUM_REGS} - The number of registers used by each thread of this function.
* - {@link #CU_FUNC_ATTRIBUTE_PTX_VERSION FUNC_ATTRIBUTE_PTX_VERSION} -
* The PTX virtual architecture version for which the function was compiled.
*
*
This value is the major PTX {@code version * 10 + the minor PTX version}, so a PTX version 1.3 function would return the value 13. Note that this
* may return the undefined value of 0 for cubins compiled prior to CUDA 3.0.
*
* - {@link #CU_FUNC_ATTRIBUTE_BINARY_VERSION FUNC_ATTRIBUTE_BINARY_VERSION} -
* The binary architecture version for which the function was compiled.
*
*
This value is the {@code major binary version * 10 + the minor binary version}, so a binary version 1.3 function would return the value 13. Note
* that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary architecture version.
*
* - {@link #CU_FUNC_ATTRIBUTE_CACHE_MODE_CA FUNC_ATTRIBUTE_CACHE_MODE_CA} - The attribute to indicate whether the function has been compiled with user specified option {@code "-Xptxas --dlcm=ca"} set.
* - {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES} -
* The maximum size in bytes of dynamically-allocated shared memory that can be used by this function.
*
*
If the user-specified dynamic shared memory size is larger than this value, the launch will fail.
*
* - {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT} -
* On devices where the L1 cache and shared memory use the same hardware resources, this sets the shared memory carveout preference, in percent of the total shared memory. Refer to {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR}.
*
*
This is only a hint, and the driver can choose a different ratio if required to execute the function.
*
*
*/
public static final int
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0x0,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 0x1,
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 0x2,
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 0x3,
CU_FUNC_ATTRIBUTE_NUM_REGS = 0x4,
CU_FUNC_ATTRIBUTE_PTX_VERSION = 0x5,
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 0x6,
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 0x7,
CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 0x8,
CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 0x9;
/**
* Function cache configurations. ({@code CUfunc_cache})
*
* Enum values:
*
*
* - {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} - no preference for shared memory or L1 (default)
* - {@link #CU_FUNC_CACHE_PREFER_SHARED FUNC_CACHE_PREFER_SHARED} - prefer larger shared memory and smaller L1 cache
* - {@link #CU_FUNC_CACHE_PREFER_L1 FUNC_CACHE_PREFER_L1} - prefer larger L1 cache and smaller shared memory
* - {@link #CU_FUNC_CACHE_PREFER_EQUAL FUNC_CACHE_PREFER_EQUAL} - prefer equal sized L1 cache and shared memory
*
*/
public static final int
CU_FUNC_CACHE_PREFER_NONE = 0x0,
CU_FUNC_CACHE_PREFER_SHARED = 0x1,
CU_FUNC_CACHE_PREFER_L1 = 0x2,
CU_FUNC_CACHE_PREFER_EQUAL = 0x3;
/**
* Shared memory configurations. ({@code CUsharedconfig})
*
* Enum values:
*
*
* - {@link #CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE} - set default shared memory bank size
* - {@link #CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE} - set shared memory bank width to four bytes
* - {@link #CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE} - set shared memory bank width to eight bytes
*
*/
public static final int
CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x0,
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x1,
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x2;
/**
* Shared memory carveout configurations. ({@code CUshared_carveout})
*
* These may be passed to {@link #cuFuncSetAttribute FuncSetAttribute}.
*
* Enum values:
*
*
* - {@link #CU_SHAREDMEM_CARVEOUT_DEFAULT SHAREDMEM_CARVEOUT_DEFAULT} - no preference for shared memory or L1 (default)
* - {@link #CU_SHAREDMEM_CARVEOUT_MAX_SHARED SHAREDMEM_CARVEOUT_MAX_SHARED} - prefer maximum available shared memory, minimum L1 cache
* - {@link #CU_SHAREDMEM_CARVEOUT_MAX_L1 SHAREDMEM_CARVEOUT_MAX_L1} - prefer maximum available L1 cache, minimum shared memory
*
*/
public static final int
CU_SHAREDMEM_CARVEOUT_DEFAULT = 0xFFFFFFFF,
CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 0x64,
CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0x0;
/**
* Memory types. ({@code CUmemorytype})
*
* Enum values:
*
*
* - {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST} - Host memory
* - {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE} - Device memory
* - {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY} - Array memory
* - {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED} - Unified device or host memory
*
*/
public static final int
CU_MEMORYTYPE_HOST = 0x1,
CU_MEMORYTYPE_DEVICE = 0x2,
CU_MEMORYTYPE_ARRAY = 0x3,
CU_MEMORYTYPE_UNIFIED = 0x4;
/**
* Compute Modes. ({@code CUcomputemode})
*
* Enum values:
*
*
* - {@link #CU_COMPUTEMODE_DEFAULT COMPUTEMODE_DEFAULT} - Default compute mode (Multiple contexts allowed per device)
* - {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED} - Compute-prohibited mode (No contexts can be created on this device at this time)
* - {@link #CU_COMPUTEMODE_EXCLUSIVE_PROCESS COMPUTEMODE_EXCLUSIVE_PROCESS} - Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time)
*
*/
public static final int
CU_COMPUTEMODE_DEFAULT = 0x0,
CU_COMPUTEMODE_PROHIBITED = 0x2,
CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 0x3;
/**
* Memory advise values. ({@code CUmem_advise})
*
* Enum values:
*
*
* - {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} - Data will mostly be read and only occassionally be written to
* - {@link #CU_MEM_ADVISE_UNSET_READ_MOSTLY MEM_ADVISE_UNSET_READ_MOSTLY} - Undo the effect of {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}
* - {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} - Set the preferred location for the data as the specified device
* - {@link #CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION MEM_ADVISE_UNSET_PREFERRED_LOCATION} - Clear the preferred location for the data
* - {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} - Data will be accessed by the specified device, so prevent page faults as much as possible
* - {@link #CU_MEM_ADVISE_UNSET_ACCESSED_BY MEM_ADVISE_UNSET_ACCESSED_BY} - Let the Unified Memory subsystem decide on the page faulting policy for the specified device
*
*/
public static final int
CU_MEM_ADVISE_SET_READ_MOSTLY = 0x1,
CU_MEM_ADVISE_UNSET_READ_MOSTLY = 0x2,
CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 0x3,
CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 0x4,
CU_MEM_ADVISE_SET_ACCESSED_BY = 0x5,
CU_MEM_ADVISE_UNSET_ACCESSED_BY = 0x6;
/**
* ({@code CUmem_range_attribute})
*
* Enum values:
*
*
* - {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY} - Whether the range will mostly be read and only occassionally be written to
* - {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION} - The preferred location of the range
* - {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY} - Memory range has {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} set for specified device
* - {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION} - The last location to which the range was prefetched
*
*/
public static final int
CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 0x1,
CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 0x2,
CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 0x3,
CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 0x4;
/**
* Online compiler and linker options. ({@code CUjit_option})
*
* Enum values:
*
*
* - {@link #CU_JIT_MAX_REGISTERS JIT_MAX_REGISTERS} -
* Max number of registers that a thread may use.
*
*
Option type: {@code unsigned int}. Applies to: compiler only
*
* - {@link #CU_JIT_THREADS_PER_BLOCK JIT_THREADS_PER_BLOCK} -
* IN: Specifies minimum number of threads per block to target compilation for
*
*
OUT: Returns the number of threads the compiler actually targeted.
*
* This restricts the resource utilization fo the compiler (e.g. max registers) such that a block with the given number of threads should be able to
* launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory
* utilization.
*
* Cannot be combined with {@link #CU_JIT_TARGET JIT_TARGET}. Option type: {@code unsigned int}. Applies to: compiler only
*
* - {@link #CU_JIT_WALL_TIME JIT_WALL_TIME} -
* Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker.
*
*
Option type: {@code float}. Applies to: compiler and linker
*
* - {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER} -
* Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option
* {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES}).
*
*
Option type: {@code char *}. Applies to: compiler and linker
*
* - {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES} -
* IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator).
*
*
OUT: Amount of log buffer filled with messages.
*
* Option type: {@code unsigned int}. Applies to: compiler and linker
*
* - {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER} -
* Pointer to a buffer in which to print any log messages that reflect errors (the buffer size is specified via option
* {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES}).
*
*
Option type: {@code char *}. Applies to: compiler and linker
*
* - {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES} -
* IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator).
*
*
OUT: Amount of log buffer filled with messages.
*
* Option type: {@code unsigned int}. Applies to: compiler and linker
*
* - {@link #CU_JIT_OPTIMIZATION_LEVEL JIT_OPTIMIZATION_LEVEL} -
* Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations.
*
*
Option type: {@code unsigned int}. Applies to: compiler only
*
* - {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT} -
* No option value required. Determines the target based on the current attached context (default).
*
*
Option type: No option value needed. Applies to: compiler and linker
*
* - {@link #CU_JIT_TARGET JIT_TARGET} -
* Target is chosen based on supplied {@code CUjit_target}. Cannot be combined with {@link #CU_JIT_THREADS_PER_BLOCK JIT_THREADS_PER_BLOCK}.
*
*
Option type: {@code unsigned int} for enumerated type {@code CUjit_target}. Applies to: compiler and linker
*
* - {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY} -
* Specifies choice of fallback strategy if matching cubin is not found.
*
*
Choice is based on supplied {@code CUjit_fallback}. This option cannot be used with {@code cuLink*} APIs as the linker requires exact matches.
*
* Option type: {@code unsigned int} for enumerated type {@code CUjit_fallback}. Applies to: compiler only
*
* - {@link #CU_JIT_GENERATE_DEBUG_INFO JIT_GENERATE_DEBUG_INFO} -
* Specifies whether to create debug information in output (-g) (0: false, default).
*
*
Option type: {@code int}. Applies to: compiler and linker
*
* - {@link #CU_JIT_LOG_VERBOSE JIT_LOG_VERBOSE} -
* Generate verbose log messages (0: false, default).
*
*
Option type: {@code int}. Applies to: compiler and linker
*
* - {@link #CU_JIT_GENERATE_LINE_INFO JIT_GENERATE_LINE_INFO} -
* Generate line number information (-lineinfo) (0: false, default).
*
*
Option type: {@code int}. Applies to: compiler only
*
* - {@link #CU_JIT_CACHE_MODE JIT_CACHE_MODE} -
* Specifies whether to enable caching explicitly (-dlcm). Choice is based on supplied {@code CUjit_cacheMode_enum}.
*
*
Option type: {@code unsigned int} for enumerated type {@code CUjit_cacheMode_enum}. Applies to: compiler only
*
* - {@link #CU_JIT_NEW_SM3X_OPT JIT_NEW_SM3X_OPT} - Used for internal purposes only, in this version of CUDA.
* - {@link #CU_JIT_FAST_COMPILE JIT_FAST_COMPILE} - Used for internal purposes only, in this version of CUDA.
* - {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES} -
* Array of device symbol names that will be relocated to the corresponing host addresses stored in {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES}.
*
*
Must contain {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} entries. When loding a device module, driver will relocate all encountered unresolved symbols to the host
* addresses. It is only allowed to register symbols that correspond to unresolved global variables. It is illegal to register the same device symbol
* at multiple addresses.
*
* Option type: {@code const char **}. Applies to: dynamic linker only
*
* - {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES} -
* Array of host addresses that will be used to relocate corresponding device symbols stored in {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES}.
*
*
Must contain {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} entries.
*
* Option type: {@code void **}. Applies to: dynamic linker only
*
* - {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} -
* Number of entries in {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES} and {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES} arrays.
*
*
Option type: {@code unsigned int}. Applies to: dynamic linker only
*
* - {@link #CU_JIT_LTO JIT_LTO} -
* Enable link-time optimization (-dlto) for device code (0: false, default)
*
*
Option type: {@code int}. Applies to: compiler and linker
*
* - {@link #CU_JIT_FTZ JIT_FTZ} -
* Control single-precision denormals (-ftz) support (0: false, default).
*
*
* - 1 : flushes denormal values to zero
* - 0 : preserves denormal values
*
*
* Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}
*
* - {@link #CU_JIT_PREC_DIV JIT_PREC_DIV} -
* Control single-precision floating-point division and reciprocals (-prec-div) support (1: true, default).
*
*
* - 1 : Enables the IEEE round-to-nearest mode
* - 0 : Enables the fast approximation mode
*
*
* Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}
*
* - {@link #CU_JIT_PREC_SQRT JIT_PREC_SQRT} -
* Control single-precision floating-point square root (-prec-sqrt) support (1: true, default).
*
*
* - 1 : Enables the IEEE round-to-nearest mode
* - 0 : Enables the fast approximation mode
*
*
* Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}
*
* - {@link #CU_JIT_FMA JIT_FMA} -
* Enable/Disable the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add (-fma) operations (1: Enable,
* default; 0: Disable).
*
*
Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}
*
* - {@link #CU_JIT_NUM_OPTIONS JIT_NUM_OPTIONS} -
* Enable/Disable the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add (-fma) operations (1: Enable,
* default; 0: Disable).
*
*
Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}
*
*
*/
public static final int
CU_JIT_MAX_REGISTERS = 0x0,
CU_JIT_THREADS_PER_BLOCK = 0x1,
CU_JIT_WALL_TIME = 0x2,
CU_JIT_INFO_LOG_BUFFER = 0x3,
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 0x4,
CU_JIT_ERROR_LOG_BUFFER = 0x5,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 0x6,
CU_JIT_OPTIMIZATION_LEVEL = 0x7,
CU_JIT_TARGET_FROM_CUCONTEXT = 0x8,
CU_JIT_TARGET = 0x9,
CU_JIT_FALLBACK_STRATEGY = 0xA,
CU_JIT_GENERATE_DEBUG_INFO = 0xB,
CU_JIT_LOG_VERBOSE = 0xC,
CU_JIT_GENERATE_LINE_INFO = 0xD,
CU_JIT_CACHE_MODE = 0xE,
CU_JIT_NEW_SM3X_OPT = 0xF,
CU_JIT_FAST_COMPILE = 0x10,
CU_JIT_GLOBAL_SYMBOL_NAMES = 0x11,
CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 0x12,
CU_JIT_GLOBAL_SYMBOL_COUNT = 0x13,
CU_JIT_LTO = 0x14,
CU_JIT_FTZ = 0x15,
CU_JIT_PREC_DIV = 0x16,
CU_JIT_PREC_SQRT = 0x17,
CU_JIT_FMA = 0x18,
CU_JIT_NUM_OPTIONS = 0x19;
/**
* Online compilation targets. ({@code CUjit_target})
*
* Enum values:
*
*
* - {@link #CU_TARGET_COMPUTE_20 TARGET_COMPUTE_20} - Compute device class 2.0
* - {@link #CU_TARGET_COMPUTE_21 TARGET_COMPUTE_21} - Compute device class 2.1
* - {@link #CU_TARGET_COMPUTE_30 TARGET_COMPUTE_30} - Compute device class 3.0
* - {@link #CU_TARGET_COMPUTE_32 TARGET_COMPUTE_32} - Compute device class 3.2
* - {@link #CU_TARGET_COMPUTE_35 TARGET_COMPUTE_35} - Compute device class 3.5
* - {@link #CU_TARGET_COMPUTE_37 TARGET_COMPUTE_37} - Compute device class 3.7
* - {@link #CU_TARGET_COMPUTE_50 TARGET_COMPUTE_50} - Compute device class 5.0
* - {@link #CU_TARGET_COMPUTE_52 TARGET_COMPUTE_52} - Compute device class 5.2
* - {@link #CU_TARGET_COMPUTE_53 TARGET_COMPUTE_53} - Compute device class 5.3
* - {@link #CU_TARGET_COMPUTE_60 TARGET_COMPUTE_60} - Compute device class 6.0.
* - {@link #CU_TARGET_COMPUTE_61 TARGET_COMPUTE_61} - Compute device class 6.1.
* - {@link #CU_TARGET_COMPUTE_62 TARGET_COMPUTE_62} - Compute device class 6.2.
* - {@link #CU_TARGET_COMPUTE_70 TARGET_COMPUTE_70} - Compute device class 7.0.
* - {@link #CU_TARGET_COMPUTE_72 TARGET_COMPUTE_72} - Compute device class 7.2.
* - {@link #CU_TARGET_COMPUTE_75 TARGET_COMPUTE_75} - Compute device class 7.5.
* - {@link #CU_TARGET_COMPUTE_80 TARGET_COMPUTE_80} - Compute device class 8.0.
* - {@link #CU_TARGET_COMPUTE_86 TARGET_COMPUTE_86} - Compute device class 8.6.
*
*/
public static final int
CU_TARGET_COMPUTE_20 = 20,
CU_TARGET_COMPUTE_21 = 21,
CU_TARGET_COMPUTE_30 = 30,
CU_TARGET_COMPUTE_32 = 32,
CU_TARGET_COMPUTE_35 = 35,
CU_TARGET_COMPUTE_37 = 37,
CU_TARGET_COMPUTE_50 = 50,
CU_TARGET_COMPUTE_52 = 52,
CU_TARGET_COMPUTE_53 = 53,
CU_TARGET_COMPUTE_60 = 60,
CU_TARGET_COMPUTE_61 = 61,
CU_TARGET_COMPUTE_62 = 62,
CU_TARGET_COMPUTE_70 = 70,
CU_TARGET_COMPUTE_72 = 72,
CU_TARGET_COMPUTE_75 = 75,
CU_TARGET_COMPUTE_80 = 80,
CU_TARGET_COMPUTE_86 = 86;
/**
* Cubin matching fallback strategies. ({@code CUjit_fallback})
*
* Enum values:
*
*
* - {@link #CU_PREFER_PTX PREFER_PTX} - Prefer to compile ptx if exact binary match not found
* - {@link #CU_PREFER_BINARY PREFER_BINARY} - Prefer to fall back to compatible binary code if exact match not found
*
*/
public static final int
CU_PREFER_PTX = 0x0,
CU_PREFER_BINARY = 0x1;
/**
* Caching modes for {@code dlcm}. ({@code CUjit_cacheMode})
*
* Enum values:
*
*
* - {@link #CU_JIT_CACHE_OPTION_NONE JIT_CACHE_OPTION_NONE} - Compile with no -dlcm flag specified
* - {@link #CU_JIT_CACHE_OPTION_CG JIT_CACHE_OPTION_CG} - Compile with L1 cache disabled
* - {@link #CU_JIT_CACHE_OPTION_CA JIT_CACHE_OPTION_CA} - Compile with L1 cache enabled
*
*/
public static final int
CU_JIT_CACHE_OPTION_NONE = 0x0,
CU_JIT_CACHE_OPTION_CG = 0x1,
CU_JIT_CACHE_OPTION_CA = 0x2;
/**
* Device code formats. ({@code CUjitInputType})
*
* Enum values:
*
*
* - {@link #CU_JIT_INPUT_CUBIN JIT_INPUT_CUBIN} -
* Compiled device-class-specific device code
*
*
Applicable options: none
*
* - {@link #CU_JIT_INPUT_PTX JIT_INPUT_PTX} -
* PTX source code.
*
*
Applicable options: PTX compiler options
*
* - {@link #CU_JIT_INPUT_FATBINARY JIT_INPUT_FATBINARY} -
* Bundle of multiple cubins and/or PTX of some device code.
*
*
Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}
*
* - {@link #CU_JIT_INPUT_OBJECT JIT_INPUT_OBJECT} -
* Host object with embedded device code.
*
*
Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}
*
* - {@link #CU_JIT_INPUT_LIBRARY JIT_INPUT_LIBRARY} -
* Archive of host objects with embedded device code.
*
*
Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}
*
* - {@link #CU_JIT_INPUT_NVVM JIT_INPUT_NVVM} -
* High-level intermediate code for link-time optimization.
*
*
Applicable options: NVVM compiler options, PTX compiler options
*
*
*/
public static final int
CU_JIT_INPUT_CUBIN = 0,
CU_JIT_INPUT_PTX = 1,
CU_JIT_INPUT_FATBINARY = 2,
CU_JIT_INPUT_OBJECT = 3,
CU_JIT_INPUT_LIBRARY = 4,
CU_JIT_INPUT_NVVM = 5;
/**
* Flags to register a graphics resource. ({@code CUgraphicsRegisterFlags})
*
* Enum values:
*
*
* - {@link #CU_GRAPHICS_REGISTER_FLAGS_NONE GRAPHICS_REGISTER_FLAGS_NONE}
* - {@link #CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY GRAPHICS_REGISTER_FLAGS_READ_ONLY}
* - {@link #CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD}
* - {@link #CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST GRAPHICS_REGISTER_FLAGS_SURFACE_LDST}
* - {@link #CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER}
*
*/
public static final int
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x0,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x1,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x2,
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x4,
CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x8;
/**
* Flags for mapping and unmapping interop resources. ({@code CUgraphicsMapResourceFlags})
*
* Enum values:
*
*
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE GRAPHICS_MAP_RESOURCE_FLAGS_NONE}
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY}
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD}
*
*/
public static final int
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x0,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x1,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x2;
/**
* Array indices for cube faces. ({@code CUarray_cubemap_face})
*
* Enum values:
*
*
* - {@link #CU_CUBEMAP_FACE_POSITIVE_X CUBEMAP_FACE_POSITIVE_X} - Positive X face of cubemap
* - {@link #CU_CUBEMAP_FACE_NEGATIVE_X CUBEMAP_FACE_NEGATIVE_X} - Negative X face of cubemap
* - {@link #CU_CUBEMAP_FACE_POSITIVE_Y CUBEMAP_FACE_POSITIVE_Y} - Positive Y face of cubemap
* - {@link #CU_CUBEMAP_FACE_NEGATIVE_Y CUBEMAP_FACE_NEGATIVE_Y} - Negative Y face of cubemap
* - {@link #CU_CUBEMAP_FACE_POSITIVE_Z CUBEMAP_FACE_POSITIVE_Z} - Positive Z face of cubemap
* - {@link #CU_CUBEMAP_FACE_NEGATIVE_Z CUBEMAP_FACE_NEGATIVE_Z} - Negative Z face of cubemap
*
*/
public static final int
CU_CUBEMAP_FACE_POSITIVE_X = 0x0,
CU_CUBEMAP_FACE_NEGATIVE_X = 0x1,
CU_CUBEMAP_FACE_POSITIVE_Y = 0x2,
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x3,
CU_CUBEMAP_FACE_POSITIVE_Z = 0x4,
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x5;
/**
* Limits. ({@code CUlimit})
*
* Enum values:
*
*
* - {@link #CU_LIMIT_STACK_SIZE LIMIT_STACK_SIZE} - GPU thread stack size
* - {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE} - GPU printf FIFO size
* - {@link #CU_LIMIT_MALLOC_HEAP_SIZE LIMIT_MALLOC_HEAP_SIZE} - GPU malloc heap size
* - {@link #CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH LIMIT_DEV_RUNTIME_SYNC_DEPTH} - GPU device runtime launch synchronize depth
* - {@link #CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT} - GPU device runtime pending launch count
* - {@link #CU_LIMIT_MAX_L2_FETCH_GRANULARITY LIMIT_MAX_L2_FETCH_GRANULARITY} - A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
* - {@link #CU_LIMIT_PERSISTING_L2_CACHE_SIZE LIMIT_PERSISTING_L2_CACHE_SIZE} - A size in bytes for L2 persisting lines cache size
*
*/
public static final int
CU_LIMIT_STACK_SIZE = 0x00,
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05,
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06;
/**
* Resource types. ({@code CUresourcetype})
*
* Enum values:
*
*
* - {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} - Array resoure
* - {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY} - Mipmapped array resource
* - {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR} - Linear resource
* - {@link #CU_RESOURCE_TYPE_PITCH2D RESOURCE_TYPE_PITCH2D} - Pitch 2D resource
*
*/
public static final int
CU_RESOURCE_TYPE_ARRAY = 0x0,
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x1,
CU_RESOURCE_TYPE_LINEAR = 0x2,
CU_RESOURCE_TYPE_PITCH2D = 0x3;
/**
* Specifies performance hint with {@link CUaccessPolicyWindow} for {@code hitProp} and {@code missProp} members. ({@code CUaccessProperty})
*
* Enum values:
*
*
* - {@link #CU_ACCESS_PROPERTY_NORMAL ACCESS_PROPERTY_NORMAL} - Normal cache persistence.
* - {@link #CU_ACCESS_PROPERTY_STREAMING ACCESS_PROPERTY_STREAMING} - Streaming access is less likely to persit from cache.
* - {@link #CU_ACCESS_PROPERTY_PERSISTING ACCESS_PROPERTY_PERSISTING} - Persisting access is more likely to persist in cache.
*
*/
public static final int
CU_ACCESS_PROPERTY_NORMAL = 0,
CU_ACCESS_PROPERTY_STREAMING = 1,
CU_ACCESS_PROPERTY_PERSISTING = 2;
/**
* Graph node types. ({@code CUgraphNodeType})
*
* Enum values:
*
*
* - {@link #CU_GRAPH_NODE_TYPE_KERNEL GRAPH_NODE_TYPE_KERNEL} - GPU kernel node
* - {@link #CU_GRAPH_NODE_TYPE_MEMCPY GRAPH_NODE_TYPE_MEMCPY} - Memcpy node
* - {@link #CU_GRAPH_NODE_TYPE_MEMSET GRAPH_NODE_TYPE_MEMSET} - Memset node
* - {@link #CU_GRAPH_NODE_TYPE_HOST GRAPH_NODE_TYPE_HOST} - Host (executable) node
* - {@link #CU_GRAPH_NODE_TYPE_GRAPH GRAPH_NODE_TYPE_GRAPH} - Node which executes an embedded graph
* - {@link #CU_GRAPH_NODE_TYPE_EMPTY GRAPH_NODE_TYPE_EMPTY} - Empty (no-op) node
* - {@link #CU_GRAPH_NODE_TYPE_WAIT_EVENT GRAPH_NODE_TYPE_WAIT_EVENT} - External event wait node
* - {@link #CU_GRAPH_NODE_TYPE_EVENT_RECORD GRAPH_NODE_TYPE_EVENT_RECORD} - External event record node
* - {@link #CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL} - External semaphore signal node
* - {@link #CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT GRAPH_NODE_TYPE_EXT_SEMAS_WAIT} - External semaphore wait node
* - {@link #CU_GRAPH_NODE_TYPE_MEM_ALLOC GRAPH_NODE_TYPE_MEM_ALLOC} - Memory Allocation Node
* - {@link #CU_GRAPH_NODE_TYPE_MEM_FREE GRAPH_NODE_TYPE_MEM_FREE} - Memory Free Node
*
*/
public static final int
CU_GRAPH_NODE_TYPE_KERNEL = 0,
CU_GRAPH_NODE_TYPE_MEMCPY = 1,
CU_GRAPH_NODE_TYPE_MEMSET = 2,
CU_GRAPH_NODE_TYPE_HOST = 3,
CU_GRAPH_NODE_TYPE_GRAPH = 4,
CU_GRAPH_NODE_TYPE_EMPTY = 5,
CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6,
CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7,
CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8,
CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9,
CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10,
CU_GRAPH_NODE_TYPE_MEM_FREE = 11;
/**
* {@code CUsynchronizationPolicy}
*
* Enum values:
*
*
* - {@link #CU_SYNC_POLICY_AUTO SYNC_POLICY_AUTO}
* - {@link #CU_SYNC_POLICY_SPIN SYNC_POLICY_SPIN}
* - {@link #CU_SYNC_POLICY_YIELD SYNC_POLICY_YIELD}
* - {@link #CU_SYNC_POLICY_BLOCKING_SYNC SYNC_POLICY_BLOCKING_SYNC}
*
*/
public static final int
CU_SYNC_POLICY_AUTO = 1,
CU_SYNC_POLICY_SPIN = 2,
CU_SYNC_POLICY_YIELD = 3,
CU_SYNC_POLICY_BLOCKING_SYNC = 4;
/**
* Graph kernel node Attributes ({@code CUkernelNodeAttrID})
*
* Enum values:
*
*
* - {@link #CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW} - Identifier for {@link CUkernelNodeAttrValue}{@code {@code accessPolicyWindow}}.
* - {@link #CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE KERNEL_NODE_ATTRIBUTE_COOPERATIVE} - Allows a kernel node to be cooperative (see {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}).
*
*/
public static final int
CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2;
/**
* Possible stream capture statuses returned by {@link #cuStreamIsCapturing StreamIsCapturing}. ({@code CUstreamCaptureStatus})
*
* Enum values:
*
*
* - {@link #CU_STREAM_CAPTURE_STATUS_NONE STREAM_CAPTURE_STATUS_NONE} - Stream is not capturing
* - {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE} - Stream is actively capturing
* - {@link #CU_STREAM_CAPTURE_STATUS_INVALIDATED STREAM_CAPTURE_STATUS_INVALIDATED} - Stream is part of a capture sequence that has been invalidated, but not terminated
*
*/
public static final int
CU_STREAM_CAPTURE_STATUS_NONE = 0x0,
CU_STREAM_CAPTURE_STATUS_ACTIVE = 0x1,
CU_STREAM_CAPTURE_STATUS_INVALIDATED = 0x2;
/**
* Possible modes for stream capture thread interactions. ({@code CUstreamCaptureMode})
*
* For more details see {@link #cuStreamBeginCapture StreamBeginCapture} and {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode}
*
* Enum values:
*
*
* - {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}
* - {@link #CU_STREAM_CAPTURE_MODE_THREAD_LOCAL STREAM_CAPTURE_MODE_THREAD_LOCAL}
* - {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}
*
*/
public static final int
CU_STREAM_CAPTURE_MODE_GLOBAL = 0,
CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1,
CU_STREAM_CAPTURE_MODE_RELAXED = 2;
/**
* Stream Attributes ({@code CUstreamAttrID})
*
* Enum values:
*
*
* - {@link #CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW} - Identifier for {@link CUstreamAttrValue}{@code {@code accessPolicyWindow}}.
* - {@link #CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY} - {@code CUsynchronizationPolicy} for work queued up in this stream
*
*/
public static final int
CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3;
/**
* Flags to specify search options. For more details see {@link #cuGetProcAddress GetProcAddress}. ({@code CUdriverProcAddress_flags})
*
* Enum values:
*
*
* - {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT} - Default search mode for driver symbols.
* - {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} - Search for legacy versions of driver symbols.
* - {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} - Search for per-thread versions of driver symbols.
*
*/
public static final int
CU_GET_PROC_ADDRESS_DEFAULT = 0,
CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1 << 0,
CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1 << 1;
/**
* Execution Affinity Types
*
* ({@code CUexecAffinityType})
*
* Enum values:
*
*
* - {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT} - Create a context with limited SMs.
* - {@link #CU_EXEC_AFFINITY_TYPE_MAX EXEC_AFFINITY_TYPE_MAX}
*
*/
public static final int
CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0,
CU_EXEC_AFFINITY_TYPE_MAX = 1;
/**
* Error codes. ({@code CUresult})
*
* Enum values:
*
*
* - {@link #CUDA_SUCCESS CUDA_SUCCESS} -
* The API call returned with no errors.
*
*
In the case of query calls, this also means that the operation being queried is complete (see {@link #cuEventQuery EventQuery} and {@link #cuStreamQuery StreamQuery}).
*
* - {@link #CUDA_ERROR_INVALID_VALUE CUDA_ERROR_INVALID_VALUE} - This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
* - {@link #CUDA_ERROR_OUT_OF_MEMORY CUDA_ERROR_OUT_OF_MEMORY} - The API call failed because it was unable to allocate enough memory to perform the requested operation.
* - {@link #CUDA_ERROR_NOT_INITIALIZED CUDA_ERROR_NOT_INITIALIZED} - This indicates that the CUDA driver has not been initialized with {@link #cuInit Init} or that initialization has failed.
* - {@link #CUDA_ERROR_DEINITIALIZED CUDA_ERROR_DEINITIALIZED} - This indicates that the CUDA driver is in the process of shutting down.
* - {@link #CUDA_ERROR_PROFILER_DISABLED CUDA_ERROR_PROFILER_DISABLED} -
* This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual
* profiler.
*
* - {@link #CUDA_ERROR_PROFILER_NOT_INITIALIZED CUDA_ERROR_PROFILER_NOT_INITIALIZED} -
* Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the profiling via
* {@link CUDAProfiler#cuProfilerStart ProfilerStart} or {@link CUDAProfiler#cuProfilerStop ProfilerStop} without initialization.
*
* - {@link #CUDA_ERROR_PROFILER_ALREADY_STARTED CUDA_ERROR_PROFILER_ALREADY_STARTED} -
* Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to call {@link CUDAProfiler#cuProfilerStart ProfilerStart} when profiling is already enabled.
*
* - {@link #CUDA_ERROR_PROFILER_ALREADY_STOPPED CUDA_ERROR_PROFILER_ALREADY_STOPPED} -
* Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to call {@link CUDAProfiler#cuProfilerStop ProfilerStop} when profiling is already disabled.
*
* - {@link #CUDA_ERROR_STUB_LIBRARY CUDA_ERROR_STUB_LIBRARY} -
* This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real
* driver loaded will result in CUDA API returning this error.
*
* - {@link #CUDA_ERROR_NO_DEVICE CUDA_ERROR_NO_DEVICE} - This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
* - {@link #CUDA_ERROR_INVALID_DEVICE CUDA_ERROR_INVALID_DEVICE} -
* This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for
* the specified device.
*
* - {@link #CUDA_ERROR_DEVICE_NOT_LICENSED CUDA_ERROR_DEVICE_NOT_LICENSED} - This error indicates that the Grid license is not applied.
* - {@link #CUDA_ERROR_INVALID_IMAGE CUDA_ERROR_INVALID_IMAGE} - This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module.
* - {@link #CUDA_ERROR_INVALID_CONTEXT CUDA_ERROR_INVALID_CONTEXT} -
* This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call
* is not a valid handle (such as a context that has had {@link #cuCtxDestroy CtxDestroy} invoked on it). This can also be returned if a user mixes different API
* versions (i.e. 3010 context with 3020 API calls). See {@link #cuCtxGetApiVersion CtxGetApiVersion} for more details.
*
* - {@link #CUDA_ERROR_CONTEXT_ALREADY_CURRENT CUDA_ERROR_CONTEXT_ALREADY_CURRENT} -
* This indicated that the context being supplied as a parameter to the API call was already the active context.Deprecated: This error return is
* deprecated as of CUDA 3.2. It is no longer an error to attempt to push the active context via {@link #cuCtxPushCurrent CtxPushCurrent}.
*
* - {@link #CUDA_ERROR_MAP_FAILED CUDA_ERROR_MAP_FAILED} - This indicates that a map or register operation has failed.
* - {@link #CUDA_ERROR_UNMAP_FAILED CUDA_ERROR_UNMAP_FAILED} - This indicates that an unmap or unregister operation has failed.
* - {@link #CUDA_ERROR_ARRAY_IS_MAPPED CUDA_ERROR_ARRAY_IS_MAPPED} - This indicates that the specified array is currently mapped and thus cannot be destroyed.
* - {@link #CUDA_ERROR_ALREADY_MAPPED CUDA_ERROR_ALREADY_MAPPED} - This indicates that the resource is already mapped.
* - {@link #CUDA_ERROR_NO_BINARY_FOR_GPU CUDA_ERROR_NO_BINARY_FOR_GPU} -
* This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation
* options for a particular CUDA source file that do not include the corresponding device configuration.
*
* - {@link #CUDA_ERROR_ALREADY_ACQUIRED CUDA_ERROR_ALREADY_ACQUIRED} - This indicates that a resource has already been acquired.
* - {@link #CUDA_ERROR_NOT_MAPPED CUDA_ERROR_NOT_MAPPED} - This indicates that a resource is not mapped.
* - {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY CUDA_ERROR_NOT_MAPPED_AS_ARRAY} - This indicates that a mapped resource is not available for access as an array.
* - {@link #CUDA_ERROR_NOT_MAPPED_AS_POINTER CUDA_ERROR_NOT_MAPPED_AS_POINTER} - This indicates that a mapped resource is not available for access as a pointer.
* - {@link #CUDA_ERROR_ECC_UNCORRECTABLE CUDA_ERROR_ECC_UNCORRECTABLE} - This indicates that an uncorrectable ECC error was detected during execution.
* - {@link #CUDA_ERROR_UNSUPPORTED_LIMIT CUDA_ERROR_UNSUPPORTED_LIMIT} - This indicates that the {@code CUlimit} passed to the API call is not supported by the active device.
* - {@link #CUDA_ERROR_CONTEXT_ALREADY_IN_USE CUDA_ERROR_CONTEXT_ALREADY_IN_USE} -
* This indicates that the {@code CUcontext} passed to the API call can only be bound to a single CPU thread at a time but is already bound to a CPU thread.
*
* - {@link #CUDA_ERROR_PEER_ACCESS_UNSUPPORTED CUDA_ERROR_PEER_ACCESS_UNSUPPORTED} - This indicates that peer access is not supported across the given devices.
* - {@link #CUDA_ERROR_INVALID_PTX CUDA_ERROR_INVALID_PTX} - This indicates that a PTX JIT compilation failed.
* - {@link #CUDA_ERROR_INVALID_GRAPHICS_CONTEXT CUDA_ERROR_INVALID_GRAPHICS_CONTEXT} - This indicates an error with OpenGL or DirectX context.
* - {@link #CUDA_ERROR_NVLINK_UNCORRECTABLE CUDA_ERROR_NVLINK_UNCORRECTABLE} - This indicates that an uncorrectable NVLink error was detected during the execution.
* - {@link #CUDA_ERROR_JIT_COMPILER_NOT_FOUND CUDA_ERROR_JIT_COMPILER_NOT_FOUND} - This indicates that the PTX JIT compiler library was not found.
* - {@link #CUDA_ERROR_UNSUPPORTED_PTX_VERSION CUDA_ERROR_UNSUPPORTED_PTX_VERSION} - This indicates that the provided PTX was compiled with an unsupported toolchain.
* - {@link #CUDA_ERROR_JIT_COMPILATION_DISABLED CUDA_ERROR_JIT_COMPILATION_DISABLED} - This indicates that the PTX JIT compilation was disabled.
* - {@link #CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY} - This indicates that the {@code CUexecAffinityType} passed to the API call is not supported by the active device.
* - {@link #CUDA_ERROR_INVALID_SOURCE CUDA_ERROR_INVALID_SOURCE} - This indicates that the device kernel source is invalid. This includes compilation/linker errors encountered in device code or user error.
* - {@link #CUDA_ERROR_FILE_NOT_FOUND CUDA_ERROR_FILE_NOT_FOUND} - This indicates that the file specified was not found.
* - {@link #CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND} - This indicates that a link to a shared object failed to resolve.
* - {@link #CUDA_ERROR_SHARED_OBJECT_INIT_FAILED CUDA_ERROR_SHARED_OBJECT_INIT_FAILED} - This indicates that initialization of a shared object failed.
* - {@link #CUDA_ERROR_OPERATING_SYSTEM CUDA_ERROR_OPERATING_SYSTEM} - This indicates that an OS call failed.
* - {@link #CUDA_ERROR_INVALID_HANDLE CUDA_ERROR_INVALID_HANDLE} -
* This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like {@code CUstream} and
* {@code CUevent}.
*
* - {@link #CUDA_ERROR_ILLEGAL_STATE CUDA_ERROR_ILLEGAL_STATE} - This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
* - {@link #CUDA_ERROR_NOT_FOUND CUDA_ERROR_NOT_FOUND} -
* This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and
* surface names.
*
* - {@link #CUDA_ERROR_NOT_READY CUDA_ERROR_NOT_READY} -
* This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated
* differently than {@link #CUDA_SUCCESS} (which indicates completion). Calls that may return this value include {@link #cuEventQuery EventQuery} and {@link #cuStreamQuery StreamQuery}.
*
* - {@link #CUDA_ERROR_ILLEGAL_ADDRESS CUDA_ERROR_ILLEGAL_ADDRESS} -
* While executing a kernel, the device encountered a load or store instruction on an invalid memory address. This leaves the process in an
* inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES} -
* This indicates that a launch did not occur because it did not have appropriate resources. This error usually indicates that the user has attempted
* to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count. Passing arguments
* of the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many arguments and can also result in this
* error.
*
* - {@link #CUDA_ERROR_LAUNCH_TIMEOUT CUDA_ERROR_LAUNCH_TIMEOUT} -
* This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT} for more information. This leaves the process in an inconsistent state and any further CUDA work will
* return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING} - This error indicates a kernel launch that uses an incompatible texturing mode.
* - {@link #CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED} -
* This error indicates that a call to {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess} is trying to re-enable peer access to a context which has already had peer access to
* it enabled.
*
* - {@link #CUDA_ERROR_PEER_ACCESS_NOT_ENABLED CUDA_ERROR_PEER_ACCESS_NOT_ENABLED} -
* This error indicates that {@link #cuCtxDisablePeerAccess CtxDisablePeerAccess} is trying to disable peer access which has not been enabled yet via {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}.
*
* - {@link #CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE} - This error indicates that the primary context for the specified device has already been initialized.
* - {@link #CUDA_ERROR_CONTEXT_IS_DESTROYED CUDA_ERROR_CONTEXT_IS_DESTROYED} -
* This error indicates that the context current to the calling thread has been destroyed using {@link #cuCtxDestroy CtxDestroy}, or is a primary context which has not
* yet been initialized.
*
* - {@link #CUDA_ERROR_ASSERT CUDA_ERROR_ASSERT} -
* A device-side assert triggered during kernel execution. The context cannot be used anymore, and must be destroyed. All existing device memory
* allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.
*
* - {@link #CUDA_ERROR_TOO_MANY_PEERS CUDA_ERROR_TOO_MANY_PEERS} -
* This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to
* {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}.
*
* - {@link #CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED} - This error indicates that the memory range passed to {@link #cuMemHostRegister MemHostRegister} has already been registered.
* - {@link #CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED} - This error indicates that the pointer passed to {@link #cuMemHostUnregister MemHostUnregister} does not correspond to any currently registered memory region.
* - {@link #CUDA_ERROR_HARDWARE_STACK_ERROR CUDA_ERROR_HARDWARE_STACK_ERROR} -
* While executing a kernel, the device encountered a stack error. This can be due to stack corruption or exceeding the stack size limit. This leaves
* the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated
* and relaunched.
*
* - {@link #CUDA_ERROR_ILLEGAL_INSTRUCTION CUDA_ERROR_ILLEGAL_INSTRUCTION} -
* While executing a kernel, the device encountered an illegal instruction. This leaves the process in an inconsistent state and any further CUDA work
* will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_MISALIGNED_ADDRESS CUDA_ERROR_MISALIGNED_ADDRESS} -
* While executing a kernel, the device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in
* an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_INVALID_ADDRESS_SPACE CUDA_ERROR_INVALID_ADDRESS_SPACE} -
* While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global,
* shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state
* and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_INVALID_PC CUDA_ERROR_INVALID_PC} -
* While executing a kernel, the device program counter wrapped its address space. This leaves the process in an inconsistent state and any further
* CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_LAUNCH_FAILED CUDA_ERROR_LAUNCH_FAILED} -
* An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of
* bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide.
* This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be
* terminated and relaunched.
*
* - {@link #CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE} -
* This error indicates that the number of blocks launched per grid for a kernel that was launched via either {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} or
* {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} exceeds the maximum number of blocks as allowed by {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} or
* {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags} times the number of multiprocessors as specified by the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}.
*
* - {@link #CUDA_ERROR_NOT_PERMITTED CUDA_ERROR_NOT_PERMITTED} - This error indicates that the attempted operation is not permitted.
* - {@link #CUDA_ERROR_NOT_SUPPORTED CUDA_ERROR_NOT_SUPPORTED} - This error indicates that the attempted operation is not supported on the current system or device.
* - {@link #CUDA_ERROR_SYSTEM_NOT_READY CUDA_ERROR_SYSTEM_NOT_READY} -
* This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid
* state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide.
*
* - {@link #CUDA_ERROR_SYSTEM_DRIVER_MISMATCH CUDA_ERROR_SYSTEM_DRIVER_MISMATCH} -
* This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility
* documentation for supported versions.
*
* - {@link #CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE} -
* This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this
* configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during
* initialization via the {@code CUDA_VISIBLE_DEVICES} environment variable.
*
* - {@link #CUDA_ERROR_MPS_CONNECTION_FAILED CUDA_ERROR_MPS_CONNECTION_FAILED} - This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
* - {@link #CUDA_ERROR_MPS_RPC_FAILURE CUDA_ERROR_MPS_RPC_FAILURE} - This error indicates that the remote procedural call between the MPS server and the MPS client failed.
* - {@link #CUDA_ERROR_MPS_SERVER_NOT_READY CUDA_ERROR_MPS_SERVER_NOT_READY} -
* This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the
* process of recovering from a fatal failure.
*
* - {@link #CUDA_ERROR_MPS_MAX_CLIENTS_REACHED CUDA_ERROR_MPS_MAX_CLIENTS_REACHED} - This error indicates that the hardware resources required to create MPS client have been exhausted.
* - {@link #CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED} - This error indicates the the hardware resources required to support device connections have been exhausted.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED} - This error indicates that the operation is not permitted when the stream is capturing.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_INVALIDATED CUDA_ERROR_STREAM_CAPTURE_INVALIDATED} - This error indicates that the current capture sequence on the stream has been invalidated due to a previous error.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_MERGE CUDA_ERROR_STREAM_CAPTURE_MERGE} - This error indicates that the operation would have resulted in a merge of two independent capture sequences.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_UNMATCHED CUDA_ERROR_STREAM_CAPTURE_UNMATCHED} - This error indicates that the capture was not initiated in this stream.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED CUDA_ERROR_STREAM_CAPTURE_UNJOINED} - This error indicates that the capture sequence contains a fork that was not joined to the primary stream.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_ISOLATION CUDA_ERROR_STREAM_CAPTURE_ISOLATION} -
* This error indicates that a dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering
* dependencies are allowed to cross the boundary.
*
* - {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT CUDA_ERROR_STREAM_CAPTURE_IMPLICIT} - This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
* - {@link #CUDA_ERROR_CAPTURED_EVENT CUDA_ERROR_CAPTURED_EVENT} - This error indicates that the operation is not permitted on an event which was last recorded in a capturing stream.
* - {@link #CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD} -
* A stream capture sequence not initiated with the {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED} argument to {@link #cuStreamBeginCapture StreamBeginCapture} was passed to {@link #cuStreamEndCapture StreamEndCapture}
* in a different thread.
*
* - {@link #CUDA_ERROR_TIMEOUT CUDA_ERROR_TIMEOUT} - This error indicates that the timeout specified for the wait operation has lapsed.
* - {@link #CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE} -
* This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph
* update.
*
* - {@link #CUDA_ERROR_EXTERNAL_DEVICE CUDA_ERROR_EXTERNAL_DEVICE} -
* This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming
* shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an
* inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
*
* - {@link #CUDA_ERROR_UNKNOWN CUDA_ERROR_UNKNOWN} - This indicates that an unknown internal error has occurred.
*
*/
public static final int
CUDA_SUCCESS = 0,
CUDA_ERROR_INVALID_VALUE = 1,
CUDA_ERROR_OUT_OF_MEMORY = 2,
CUDA_ERROR_NOT_INITIALIZED = 3,
CUDA_ERROR_DEINITIALIZED = 4,
CUDA_ERROR_PROFILER_DISABLED = 5,
CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
CUDA_ERROR_STUB_LIBRARY = 34,
CUDA_ERROR_NO_DEVICE = 100,
CUDA_ERROR_INVALID_DEVICE = 101,
CUDA_ERROR_DEVICE_NOT_LICENSED = 102,
CUDA_ERROR_INVALID_IMAGE = 200,
CUDA_ERROR_INVALID_CONTEXT = 201,
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
CUDA_ERROR_MAP_FAILED = 205,
CUDA_ERROR_UNMAP_FAILED = 206,
CUDA_ERROR_ARRAY_IS_MAPPED = 207,
CUDA_ERROR_ALREADY_MAPPED = 208,
CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
CUDA_ERROR_ALREADY_ACQUIRED = 210,
CUDA_ERROR_NOT_MAPPED = 211,
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
CUDA_ERROR_ECC_UNCORRECTABLE = 214,
CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
CUDA_ERROR_INVALID_PTX = 218,
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
CUDA_ERROR_JIT_COMPILATION_DISABLED = 223,
CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224,
CUDA_ERROR_INVALID_SOURCE = 300,
CUDA_ERROR_FILE_NOT_FOUND = 301,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
CUDA_ERROR_OPERATING_SYSTEM = 304,
CUDA_ERROR_INVALID_HANDLE = 400,
CUDA_ERROR_ILLEGAL_STATE = 401,
CUDA_ERROR_NOT_FOUND = 500,
CUDA_ERROR_NOT_READY = 600,
CUDA_ERROR_ILLEGAL_ADDRESS = 700,
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
CUDA_ERROR_LAUNCH_TIMEOUT = 702,
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
CUDA_ERROR_ASSERT = 710,
CUDA_ERROR_TOO_MANY_PEERS = 711,
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
CUDA_ERROR_MISALIGNED_ADDRESS = 716,
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
CUDA_ERROR_INVALID_PC = 718,
CUDA_ERROR_LAUNCH_FAILED = 719,
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
CUDA_ERROR_NOT_PERMITTED = 800,
CUDA_ERROR_NOT_SUPPORTED = 801,
CUDA_ERROR_SYSTEM_NOT_READY = 802,
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
CUDA_ERROR_MPS_CONNECTION_FAILED = 805,
CUDA_ERROR_MPS_RPC_FAILURE = 806,
CUDA_ERROR_MPS_SERVER_NOT_READY = 807,
CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808,
CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809,
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901,
CUDA_ERROR_STREAM_CAPTURE_MERGE = 902,
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903,
CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904,
CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905,
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906,
CUDA_ERROR_CAPTURED_EVENT = 907,
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,
CUDA_ERROR_TIMEOUT = 909,
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
CUDA_ERROR_EXTERNAL_DEVICE = 911,
CUDA_ERROR_UNKNOWN = 999;
/**
* P2P Attributes. ({@code CUdevice_P2PAttribute})
*
* Enum values:
*
*
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK} - A relative value indicating the performance of the link between two devices
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED} - P2P Access is enable
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED} - Atomic operation over the link supported
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED} - Deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED} - Accessing CUDA arrays over the link supported
*
*/
public static final int
CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x1,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x2,
CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x3,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x4,
CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x4;
/**
* Flags for {@link #cuMemHostAlloc MemHostAlloc}.
*
* Enum values:
*
*
* - {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE} - If set, host memory is portable between CUDA contexts.
* - {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} - If set, host memory is mapped into CUDA address space and {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} may be called on the host pointer.
* - {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} -
* If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction
* ({@code MOVNTDQA}).
*
*
*/
public static final int
CU_MEMHOSTALLOC_PORTABLE = 0x1,
CU_MEMHOSTALLOC_DEVICEMAP = 0x2,
CU_MEMHOSTALLOC_WRITECOMBINED = 0x4;
/**
* Flags for {@link #cuMemHostRegister MemHostRegister}.
*
* Enum values:
*
*
* - {@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE} - If set, host memory is portable between CUDA contexts.
* - {@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP} - If set, host memory is mapped into CUDA address space and {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} may be called on the host pointer.
* - {@link #CU_MEMHOSTREGISTER_IOMEMORY MEMHOSTREGISTER_IOMEMORY} -
* If set, the passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. belonging to a third-party PCIe device.
*
*
On Windows the flag is a no-op. On Linux that memory is marked as non cache-coherent for the GPU and is expected to be physically contiguous.
* It may return {@link #CUDA_ERROR_NOT_PERMITTED} if run as an unprivileged user, {@link #CUDA_ERROR_NOT_SUPPORTED} on older Linux kernel versions. On all other
* platforms, it is not supported and {@link #CUDA_ERROR_NOT_SUPPORTED} is returned.
*
* - {@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY} -
* If set, the passed memory pointer is treated as pointing to memory that is considered read-only by the device.
*
*
On platforms without {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, this flag is required in order to register memory mapped to
* the CPU as read-only. Support for the use of this flag can be queried from the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED}. Using this flag with a current context associated with a device that does not have this
* attribute set will cause {@link #cuMemHostRegister MemHostRegister} to error with {@link #CUDA_ERROR_NOT_SUPPORTED}.
*
*
*/
public static final int
CU_MEMHOSTREGISTER_PORTABLE = 0x01,
CU_MEMHOSTREGISTER_DEVICEMAP = 0x02,
CU_MEMHOSTREGISTER_IOMEMORY = 0x04,
CU_MEMHOSTREGISTER_READ_ONLY = 0x08;
/** Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers. */
public static final int CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = 0x1;
/**
* Resource view format. ({@code CUresourceViewFormat})
*
* Enum values:
*
*
* - {@link #CU_RES_VIEW_FORMAT_NONE RES_VIEW_FORMAT_NONE} - No resource view format (use underlying resource format)
* - {@link #CU_RES_VIEW_FORMAT_UINT_1X8 RES_VIEW_FORMAT_UINT_1X8} - 1 channel unsigned 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_2X8 RES_VIEW_FORMAT_UINT_2X8} - 2 channel unsigned 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_4X8 RES_VIEW_FORMAT_UINT_4X8} - 4 channel unsigned 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_1X8 RES_VIEW_FORMAT_SINT_1X8} - 1 channel signed 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_2X8 RES_VIEW_FORMAT_SINT_2X8} - 2 channel signed 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_4X8 RES_VIEW_FORMAT_SINT_4X8} - 4 channel signed 8-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_1X16 RES_VIEW_FORMAT_UINT_1X16} - 1 channel unsigned 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_2X16 RES_VIEW_FORMAT_UINT_2X16} - 2 channel unsigned 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_4X16 RES_VIEW_FORMAT_UINT_4X16} - 4 channel unsigned 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_1X16 RES_VIEW_FORMAT_SINT_1X16} - 1 channel signed 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_2X16 RES_VIEW_FORMAT_SINT_2X16} - 2 channel signed 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_4X16 RES_VIEW_FORMAT_SINT_4X16} - 4 channel signed 16-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_1X32 RES_VIEW_FORMAT_UINT_1X32} - 1 channel unsigned 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_2X32 RES_VIEW_FORMAT_UINT_2X32} - 2 channel unsigned 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_UINT_4X32 RES_VIEW_FORMAT_UINT_4X32} - 4 channel unsigned 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_1X32 RES_VIEW_FORMAT_SINT_1X32} - 1 channel signed 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_2X32 RES_VIEW_FORMAT_SINT_2X32} - 2 channel signed 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_SINT_4X32 RES_VIEW_FORMAT_SINT_4X32} - 4 channel signed 32-bit integers
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_1X16 RES_VIEW_FORMAT_FLOAT_1X16} - 1 channel 16-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_2X16 RES_VIEW_FORMAT_FLOAT_2X16} - 2 channel 16-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_4X16 RES_VIEW_FORMAT_FLOAT_4X16} - 4 channel 16-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_1X32 RES_VIEW_FORMAT_FLOAT_1X32} - 1 channel 32-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_2X32 RES_VIEW_FORMAT_FLOAT_2X32} - 2 channel 32-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_FLOAT_4X32 RES_VIEW_FORMAT_FLOAT_4X32} - 4 channel 32-bit floating point
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC1 RES_VIEW_FORMAT_UNSIGNED_BC1} - Block compressed 1
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC2 RES_VIEW_FORMAT_UNSIGNED_BC2} - Block compressed 2
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC3 RES_VIEW_FORMAT_UNSIGNED_BC3} - Block compressed 3
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC4 RES_VIEW_FORMAT_UNSIGNED_BC4} - Block compressed 4 unsigned
* - {@link #CU_RES_VIEW_FORMAT_SIGNED_BC4 RES_VIEW_FORMAT_SIGNED_BC4} - Block compressed 4 signed
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC5 RES_VIEW_FORMAT_UNSIGNED_BC5} - Block compressed 5 unsigned
* - {@link #CU_RES_VIEW_FORMAT_SIGNED_BC5 RES_VIEW_FORMAT_SIGNED_BC5} - Block compressed 5 signed
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC6H RES_VIEW_FORMAT_UNSIGNED_BC6H} - Block compressed 6 unsigned half-float
* - {@link #CU_RES_VIEW_FORMAT_SIGNED_BC6H RES_VIEW_FORMAT_SIGNED_BC6H} - Block compressed 6 signed half-float
* - {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC7 RES_VIEW_FORMAT_UNSIGNED_BC7} - Block compressed 7
*
*/
public static final int
CU_RES_VIEW_FORMAT_NONE = 0x0,
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x1,
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x2,
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x3,
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x4,
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x5,
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x6,
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x7,
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x8,
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x9,
CU_RES_VIEW_FORMAT_SINT_1X16 = 0xA,
CU_RES_VIEW_FORMAT_SINT_2X16 = 0xB,
CU_RES_VIEW_FORMAT_SINT_4X16 = 0xC,
CU_RES_VIEW_FORMAT_UINT_1X32 = 0xD,
CU_RES_VIEW_FORMAT_UINT_2X32 = 0xE,
CU_RES_VIEW_FORMAT_UINT_4X32 = 0xF,
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1A,
CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1B,
CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1C,
CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1D,
CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1E,
CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1F,
CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22;
/**
* Access flags that specify the level of access the current context's device has on the memory referenced. ({@code CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS})
*
* Enum values:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE POINTER_ATTRIBUTE_ACCESS_FLAG_NONE} -
* No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain
* operations
*
* - {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ POINTER_ATTRIBUTE_ACCESS_FLAG_READ} - Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case.
* - {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE} - Read-write access, the device has full read-write access to the memory
*
*/
public static final int
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0,
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1,
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3;
/**
* External memory handle types. ({@code CUexternalMemoryHandleType})
*
* Enum values:
*
*
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD} - Handle is an opaque file descriptor
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32} - Handle is an opaque shared NT handle
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT} - Handle is an opaque, globally shared handle
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP} - Handle is a D3D12 heap object
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE} - Handle is a D3D12 committed resource
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE} - Handle is a shared NT handle to a D3D11 resource
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT} - Handle is a globally shared handle to a D3D11 resource
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF} - Handle is an NvSciBuf object
*
*/
public static final int
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8;
/** Indicates that the external memory object is a dedicated resource. */
public static final int CUDA_EXTERNAL_MEMORY_DEDICATED = 0x1;
/**
* When the {@code flags} parameter of {@link CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS} contains this flag, it indicates that signaling an external semaphore
* object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as
* {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, which otherwise are performed by default to ensure data coherency with other importers of the same
* {@code NvSciBuf} memory objects.
*/
public static final int CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC = 0x01;
/**
* When the {@code flags} parameter of {@link CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS} contains this flag, it indicates that waiting on an external
* semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as
* {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, which otherwise are performed by default to ensure data coherency with other importers of the same
* {@code NvSciBuf} memory objects.
*/
public static final int CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC = 0x02;
/**
* When {@code flags} of {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} is set to this, it indicates that application needs signaler specific
* {@code NvSciSyncAttr} to be filled by {@code cuDeviceGetNvSciSyncAttributes}.
*/
public static final int CUDA_NVSCISYNC_ATTR_SIGNAL = 0x1;
/**
* When {@code flags} of {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} is set to this, it indicates that application needs waiter specific {@code NvSciSyncAttr} to be
* filled by {@code cuDeviceGetNvSciSyncAttributes}.
*/
public static final int CUDA_NVSCISYNC_ATTR_WAIT = 0x2;
/**
* External semaphore handle types. ({@code CUexternalSemaphoreHandleType})
*
* Enum values:
*
*
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD} - Handle is an opaque file descriptor
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32} - Handle is an opaque shared NT handle
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} - Handle is an opaque, globally shared handle
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE} - Handle is a shared NT handle referencing a D3D12 fence object
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE} - Handle is a shared NT handle referencing a D3D11 fence object
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} - Opaque handle to NvSciSync Object
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX} - Handle is a shared NT handle referencing a D3D11 keyed mutex object
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} - Handle is a globally shared handle referencing a D3D11 keyed mutex object
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD} - Handle is an opaque file descriptor referencing a timeline semaphore
* - {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} - Handle is an opaque shared NT handle referencing a timeline semaphore
*
*/
public static final int
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10;
/**
* Flags for specifying particular handle types. ({@code CUmemAllocationHandleType})
*
* Enum values:
*
*
* - {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE} - Does not allow any export mechanism.
* - {@link #CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR} - Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. ({@code int})
* - {@link #CU_MEM_HANDLE_TYPE_WIN32 MEM_HANDLE_TYPE_WIN32} - Allows a Win32 NT handle to be used for exporting. ({@code HANDLE})
* - {@link #CU_MEM_HANDLE_TYPE_WIN32_KMT MEM_HANDLE_TYPE_WIN32_KMT} - Allows a Win32 KMT handle to be used for exporting. ({@code D3DKMT_HANDLE})
*
*/
public static final int
CU_MEM_HANDLE_TYPE_NONE = 0x0,
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4;
/**
* Specifies the memory protection flags for mapping. ({@code CUmemAccess_flags})
*
* Enum values:
*
*
* - {@link #CU_MEM_ACCESS_FLAGS_PROT_NONE MEM_ACCESS_FLAGS_PROT_NONE} - Default, make the address range not accessible
* - {@link #CU_MEM_ACCESS_FLAGS_PROT_READ MEM_ACCESS_FLAGS_PROT_READ} - Make the address range read accessible
* - {@link #CU_MEM_ACCESS_FLAGS_PROT_READWRITE MEM_ACCESS_FLAGS_PROT_READWRITE} - Make the address range read-write accessible
*
*/
public static final int
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3;
/**
* Specifies the type of location. ({@code CUmemLocationType})
*
* Enum values:
*
*
* - {@link #CU_MEM_LOCATION_TYPE_INVALID MEM_LOCATION_TYPE_INVALID}
* - {@link #CU_MEM_LOCATION_TYPE_DEVICE MEM_LOCATION_TYPE_DEVICE} - Location is a device location, thus id is a device ordinal
*
*/
public static final int
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
CU_MEM_LOCATION_TYPE_DEVICE = 0x1;
/**
* Defines the allocation types available. ({@code CUmemAllocationType})
*
* Enum values:
*
*
* - {@link #CU_MEM_ALLOCATION_TYPE_INVALID MEM_ALLOCATION_TYPE_INVALID}
* - {@link #CU_MEM_ALLOCATION_TYPE_PINNED MEM_ALLOCATION_TYPE_PINNED} - This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
*
*/
public static final int
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1;
/**
* Flag for requesting different optimal and required granularities for an allocation. ({@code CUmemAllocationGranularity_flags})
*
* Enum values:
*
*
* - {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} - Minimum required granularity for allocation
* - {@link #CU_MEM_ALLOC_GRANULARITY_RECOMMENDED MEM_ALLOC_GRANULARITY_RECOMMENDED} - Recommended granularity for allocation for best performance
*
*/
public static final int
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1;
/**
* Sparse subresource types. ({@code CUarraySparseSubresourceType})
*
* Enum values:
*
*
* - {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL}
* - {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL}
*
*/
public static final int
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1;
/**
* Memory operation types. ({@code CUmemOperationType})
*
* Enum values:
*
*
* - {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP}
* - {@link #CU_MEM_OPERATION_TYPE_UNMAP MEM_OPERATION_TYPE_UNMAP}
*
*/
public static final int
CU_MEM_OPERATION_TYPE_MAP = 1,
CU_MEM_OPERATION_TYPE_UNMAP = 2;
/** Memory handle types ({@code CUmemHandleType}) */
public static final int CU_MEM_HANDLE_TYPE_GENERIC = 0;
/**
* Specifies compression attribute for an allocation. ({@code CUmemAllocationCompType})
*
* Enum values:
*
*
* - {@link #CU_MEM_ALLOCATION_COMP_NONE MEM_ALLOCATION_COMP_NONE} - Allocating non-compressible memory
* - {@link #CU_MEM_ALLOCATION_COMP_GENERIC MEM_ALLOCATION_COMP_GENERIC} - Allocating compressible memory
*
*/
public static final int
CU_MEM_ALLOCATION_COMP_NONE = 0x0,
CU_MEM_ALLOCATION_COMP_GENERIC = 0x1;
/** This flag if set indicates that the memory will be used as a tile pool. */
public static final int CU_MEM_CREATE_USAGE_TILE_POOL = 0x1;
/**
* {@code CUgraphExecUpdateResult}
*
* Enum values:
*
*
* - {@link #CU_GRAPH_EXEC_UPDATE_SUCCESS GRAPH_EXEC_UPDATE_SUCCESS} - The update succeeded
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR} - The update failed for an unexpected reason which is described in the return value of the function
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} - The update failed because the topology changed
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED} - The update failed because a node type changed
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED} - The update failed because the function of a kernel node changed (CUDA driver <11.2)
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED} - The update failed because the parameters changed in a way that is not supported
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED} - The update failed because something about the node is not supported
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE} - The update failed because the function of a kernel node changed in an unsupported way
*
*/
public static final int
CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0,
CU_GRAPH_EXEC_UPDATE_ERROR = 0x1,
CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2,
CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3,
CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4,
CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5,
CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6,
CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7;
/**
* CUDA memory pool attributes ({@code CUmemPool_attribute})
*
* Enum values:
*
*
* - {@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} -
* Allow {@link #cuMemAllocAsync MemAllocAsync} to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on
* the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies.
*
*
(value type = {@code int}, default enabled)
*
* - {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} - Allow reuse of already completed frees when there is no dependency between the free and allocation. (value type = {@code int}, default enabled)
* - {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} -
* Allow {@link #cuMemAllocAsync MemAllocAsync} to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by
* {@link #cuMemFreeAsync MemFreeAsync}.
*
*
(value type = {@code int}, default enabled).
*
* - {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} -
* Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.
*
*
When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the
* next call to stream, event or context synchronize.
*
* (value type = {@code cuuint64_t}, default 0)
*
* - {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} - Amount of backing memory currently allocated for the mempool. (value type = {@code cuuint64_t})
* - {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} -
* High watermark of backing memory allocated for the {@code mempool} since the last time it was reset. High watermark can only be reset to zero.
*
*
(value type = {@code cuuint64_t})
*
* - {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} - Amount of memory from the pool that is currently in use by the application (value type = {@code cuuint64_t}).
* - {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH} -
* High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only
* be reset to zero.
*
*
(value type = {@code cuuint64_t})
*
*
*/
public static final int
CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC = 2,
CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES = 3,
CU_MEMPOOL_ATTR_RELEASE_THRESHOLD = 4,
CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT = 5,
CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH = 6,
CU_MEMPOOL_ATTR_USED_MEM_CURRENT = 7,
CU_MEMPOOL_ATTR_USED_MEM_HIGH = 8;
/**
* {@code CUgraphMem_attribute}
*
* Enum values:
*
*
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT} - (value type = cuuint64_t) Amount of memory, in bytes, currently associated with graphs
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH} -
* High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero.
*
*
(value type = {@code cuuint64_t})
*
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT} - Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. (value type = {@code cuuint64_t})
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH} - High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. (value type = {@code cuuint64_t})
*
*/
public static final int
CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT = 0,
CU_GRAPH_MEM_ATTR_USED_MEM_HIGH = 1,
CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT = 2,
CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH = 3;
/**
* Enum values:
*
*
* - {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC} -
* If set, each kernel launched as part of {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} only waits for prior work in the stream corresponding to that GPU to
* complete before the kernel begins execution.
*
* - {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC} -
* If set, any subsequent work pushed in a stream that participated in a call to {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} will only wait for the kernel
* launched on the GPU corresponding to that stream to complete before it begins execution.
*
*
*/
public static final int
CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC = 0x1,
CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC = 0x2;
/**
* Enum values:
*
*
* - {@link #CUDA_ARRAY3D_LAYERED CUDA_ARRAY3D_LAYERED} -
* If set, the CUDA array is a collection of layers, where each layer is either a 1D or a 2D array and the Depth member of {@link CUDA_ARRAY3D_DESCRIPTOR}
* specifies the number of layers, not the depth of a 3D array.
*
* - {@link #CUDA_ARRAY3D_2DARRAY CUDA_ARRAY3D_2DARRAY} - Deprecated, use {@link #CUDA_ARRAY3D_LAYERED}.
* - {@link #CUDA_ARRAY3D_SURFACE_LDST CUDA_ARRAY3D_SURFACE_LDST} - This flag must be set in order to bind a surface reference to the CUDA array.
* - {@link #CUDA_ARRAY3D_CUBEMAP CUDA_ARRAY3D_CUBEMAP} -
* If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The width of such a CUDA array must be equal to its height,
* and Depth must be six. If {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then the CUDA array is a collection of cubemaps and Depth must be a multiple of
* six.
*
* - {@link #CUDA_ARRAY3D_TEXTURE_GATHER CUDA_ARRAY3D_TEXTURE_GATHER} - This flag must be set in order to perform texture gather operations on a CUDA array.
* - {@link #CUDA_ARRAY3D_DEPTH_TEXTURE CUDA_ARRAY3D_DEPTH_TEXTURE} - This flag if set indicates that the CUDA array is a DEPTH_TEXTURE.
* - {@link #CUDA_ARRAY3D_COLOR_ATTACHMENT CUDA_ARRAY3D_COLOR_ATTACHMENT} - This flag indicates that the CUDA array may be bound as a color target in an external graphics API.
* - {@link #CUDA_ARRAY3D_SPARSE CUDA_ARRAY3D_SPARSE} - This flag if set indicates that the CUDA array or CUDA mipmapped array is a sparse CUDA array or CUDA mipmapped array respectively
*
*/
public static final int
CUDA_ARRAY3D_LAYERED = 0x01,
CUDA_ARRAY3D_2DARRAY = 0x01,
CUDA_ARRAY3D_SURFACE_LDST = 0x02,
CUDA_ARRAY3D_CUBEMAP = 0x04,
CUDA_ARRAY3D_TEXTURE_GATHER = 0x08,
CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10,
CUDA_ARRAY3D_COLOR_ATTACHMENT = 0x20,
CUDA_ARRAY3D_SPARSE = 0x40;
/**
* Flag for {@link #cuTexRefSetArray TexRefSetArray}.
*
* Enum values:
*
*
* - {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT} - Override the {@code texref} format with a format inferred from the array.
*
*/
public static final int CU_TRSA_OVERRIDE_FORMAT = 0x1;
/**
* Flag for {@link #cuTexRefSetFlags TexRefSetFlags}.
*
* Enum values:
*
*
* - {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER} - Read the texture as integers rather than promoting the values to floats in the range {@code [0,1]}.
* - {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES} - Use normalized texture coordinates in the range {@code [0,1)} instead of {@code [0,dim)}.
* - {@link #CU_TRSF_SRGB TRSF_SRGB} - Perform {@code sRGB->linear} conversion during texture read.
* - {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION} - Disable any trilinear filtering optimizations.
*
*/
public static final int
CU_TRSF_READ_AS_INTEGER = 0x01,
CU_TRSF_NORMALIZED_COORDINATES = 0x02,
CU_TRSF_SRGB = 0x10,
CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 0x20;
/** End of array terminator for the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel}. */
public static final long CU_LAUNCH_PARAM_END = 0x0L;
/**
* Indicator that the next value in the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel} will be a pointer to a buffer containing all kernel parameters used for
* launching kernel {@code f}.
*
* This buffer needs to honor all alignment/padding requirements of the individual parameters. If {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE} is not also specified in the
* {@code extra} array, then {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER} will have no effect.
*/
public static final long CU_LAUNCH_PARAM_BUFFER_POINTER = 0x1L;
/**
* Indicator that the next value in the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel} will be a pointer to a {@code size_t} which contains the size of the
* buffer specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}.
*
* It is required that {@code CU_LAUNCH_PARAM_BUFFER_POINTER} also be specified in the {@code extra} array if the value associated with
* {@code CU_LAUNCH_PARAM_BUFFER_SIZE} is not zero.
*/
public static final long CU_LAUNCH_PARAM_BUFFER_SIZE = 0x2L;
/** For texture references loaded into the module, use default texunit from texture reference. */
public static final int CU_PARAM_TR_DEFAULT = -1;
/** Device that represents the CPU. */
public static final int CU_DEVICE_CPU = -1;
/** Device that represents an invalid device. */
public static final int CU_DEVICE_INVALID = -2;
/**
* Bitmasks for {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS}. ({@code CUflushGPUDirectRDMAWritesOptions})
*
* Enum values:
*
*
* - {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST} - {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} and its CUDA Runtime API counterpart are supported on the device.
* - {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS} - The {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} flag and the {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} {@code MemOp} are supported on the device.
*
*/
public static final int
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1<<0,
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 1<<1;
/**
* Platform native ordering for GPUDirect RDMA writes. ({@code CUGPUDirectRDMAWritesOrdering})
*
* Enum values:
*
*
* - {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE GPU_DIRECT_RDMA_WRITES_ORDERING_NONE} - The device does not natively support ordering of remote writes. {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} can be leveraged if supported.
* - {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER} - Natively, the device can consistently consume remote writes, although other CUDA devices may not.
* - {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES} - Any CUDA device in the system can consistently consume remote writes to this device.
*
*/
public static final int
CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0,
CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100,
CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200;
/**
* The scopes for {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} ({@code CUflushGPUDirectRDMAWritesScope})
*
* Enum values:
*
*
* - {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER} - Blocks until remote writes are visible to the CUDA device context owning the data.
* - {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES} - Blocks until remote writes are visible to all CUDA device contexts.
*
*/
public static final int
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100,
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200;
/**
* The targets for {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} ({@code CUflushGPUDirectRDMAWritesTarget})
*
* Enum values:
*
*
* - {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX} - Sets the target for {@code cuFlushGPUDirectRDMAWrites()} to the currently active CUDA device context.
*
*/
public static final int CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0;
/**
* The additional write options for {@link #cuGraphDebugDotPrint GraphDebugDotPrint} ({@code CUgraphDebugDot_flags})
*
* Enum values:
*
*
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE GRAPH_DEBUG_DOT_FLAGS_VERBOSE}
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES} - Output all debug data as if every debug flag is enabled
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS} - Use CUDA Runtime structures for output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS} - Adds {@link CUDA_KERNEL_NODE_PARAMS} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS} - Adds {@link CUDA_MEMCPY3D} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS} - Adds {@link CUDA_MEMSET_NODE_PARAMS} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS} - Adds {@link CUDA_HOST_NODE_PARAMS} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS} - Adds {@code CUevent} handle from record and wait nodes to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS} - Adds {@link CUDA_EXT_SEM_SIGNAL_NODE_PARAMS} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES} - Adds {@link CUDA_EXT_SEM_WAIT_NODE_PARAMS} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES GRAPH_DEBUG_DOT_FLAGS_HANDLES} - Adds {@code CUkernelNodeAttrValue} values to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS} - Adds node handles and every kernel function handle to output
* - {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS} - Adds memory alloc node parameters to output
*
*/
public static final int
CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1<<0,
CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 1<<1,
CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 1<<2,
CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 1<<3,
CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 1<<4,
CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 1<<5,
CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 1<<6,
CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 1<<7,
CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 1<<8,
CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 1<<9,
CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1<<10,
CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 1<<11,
CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 1<<12;
/**
* Flags for user objects for graphs. ({@code CUuserObject_flags})
*
* Enum values:
*
*
* - {@link #CU_USER_OBJECT_NO_DESTRUCTOR_SYNC USER_OBJECT_NO_DESTRUCTOR_SYNC} - Indicates the destructor execution is not synchronized by any CUDA handle.
*
*/
public static final int CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1;
/**
* Flags for retaining user object references for graphs. ({@code CUuserObjectRetain_flags})
*
* Enum values:
*
*
* - {@link #CU_GRAPH_USER_OBJECT_MOVE GRAPH_USER_OBJECT_MOVE} - Transfer references from the caller rather than creating new references.
*
*/
public static final int CU_GRAPH_USER_OBJECT_MOVE = 1;
/**
* Flags for instantiating a graph. ({@code CUgraphInstantiate_flags})
*
* Enum values:
*
*
* - {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH} - Automatically free memory allocated in a graph before relaunching.
*
*/
public static final int CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1;
protected CU() {
throw new UnsupportedOperationException();
}
// --- [ cuGetErrorString ] ---
/** Unsafe version of: {@link #cuGetErrorString GetErrorString} */
public static int ncuGetErrorString(int error, long pStr) {
long __functionAddress = Functions.GetErrorString;
return callPI(error, pStr, __functionAddress);
}
/**
* Gets the string description of an error code.
*
* Sets {@code *pStr} to the address of a NULL-terminated string description of the error code {@code error}. If the error code is not recognized,
* {@link #CUDA_ERROR_INVALID_VALUE} will be returned and {@code *pStr} will be set to the {@code NULL} address.
*
* @param error error code to convert to string
* @param pStr address of the string pointer
*/
@NativeType("CUresult")
public static int cuGetErrorString(@NativeType("CUresult") int error, @NativeType("char const **") PointerBuffer pStr) {
if (CHECKS) {
check(pStr, 1);
}
return ncuGetErrorString(error, memAddress(pStr));
}
// --- [ cuGetErrorName ] ---
/** Unsafe version of: {@link #cuGetErrorName GetErrorName} */
public static int ncuGetErrorName(int error, long pStr) {
long __functionAddress = Functions.GetErrorName;
return callPI(error, pStr, __functionAddress);
}
/**
* Gets the string representation of an error code enum name.
*
* Sets {@code *pStr} to the address of a NULL-terminated string representation of the name of the enum error code {@code error}. If the error code is not
* recognized, {@link #CUDA_ERROR_INVALID_VALUE} will be returned and {@code *pStr} will be set to the {@code NULL} address.
*
* @param error error code to convert to string
* @param pStr address of the string pointer
*/
@NativeType("CUresult")
public static int cuGetErrorName(@NativeType("CUresult") int error, @NativeType("char const **") PointerBuffer pStr) {
if (CHECKS) {
check(pStr, 1);
}
return ncuGetErrorName(error, memAddress(pStr));
}
// --- [ cuInit ] ---
/**
* Initialize the CUDA driver API.
*
* Initializes the driver API and must be called before any other function from the driver API. Currently, the {@code Flags} parameter must be 0. If
* {@code cuInit()} has not been called, any function from the driver API will return {@link #CUDA_ERROR_NOT_INITIALIZED}.
*
* @param Flags initialization flag for CUDA
*/
@NativeType("CUresult")
public static int cuInit(@NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.Init;
return callI(Flags, __functionAddress);
}
// --- [ cuDriverGetVersion ] ---
/** Unsafe version of: {@link #cuDriverGetVersion DriverGetVersion} */
public static int ncuDriverGetVersion(long driverVersion) {
long __functionAddress = Functions.DriverGetVersion;
return callPI(driverVersion, __functionAddress);
}
/**
* Returns the latest CUDA version supported by driver.
*
* Returns in {@code *driverVersion} the version of CUDA supported by the driver. The version is returned as ({@code 1000 × major + 10 × minor}). For
* example, CUDA 9.2 would be represented by 9020.
*
* This function automatically returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code driverVersion} is {@code NULL}.
*
* @param driverVersion returns the CUDA driver version
*/
@NativeType("CUresult")
public static int cuDriverGetVersion(@NativeType("int *") IntBuffer driverVersion) {
if (CHECKS) {
check(driverVersion, 1);
}
return ncuDriverGetVersion(memAddress(driverVersion));
}
// --- [ cuDeviceGet ] ---
/** Unsafe version of: {@link #cuDeviceGet DeviceGet} */
public static int ncuDeviceGet(long device, int ordinal) {
long __functionAddress = Functions.DeviceGet;
return callPI(device, ordinal, __functionAddress);
}
/**
* Returns a handle to a compute device.
*
* Returns in {@code *device} a device handle given an ordinal in the range {@code [0, cuDeviceGetCount()-1]}.
*
* @param device returned device handle
* @param ordinal device number to get handle for
*/
@NativeType("CUresult")
public static int cuDeviceGet(@NativeType("CUdevice *") IntBuffer device, int ordinal) {
if (CHECKS) {
check(device, 1);
}
return ncuDeviceGet(memAddress(device), ordinal);
}
// --- [ cuDeviceGetCount ] ---
/** Unsafe version of: {@link #cuDeviceGetCount DeviceGetCount} */
public static int ncuDeviceGetCount(long count) {
long __functionAddress = Functions.DeviceGetCount;
return callPI(count, __functionAddress);
}
/**
* Returns the number of compute-capable devices.
*
* Returns in {@code *count} the number of devices with compute capability greater than or equal to 2.0 that are available for execution. If there is no
* such device, {@code cuDeviceGetCount()} returns 0.
*
* @param count returned number of compute-capable devices
*/
@NativeType("CUresult")
public static int cuDeviceGetCount(@NativeType("int *") IntBuffer count) {
if (CHECKS) {
check(count, 1);
}
return ncuDeviceGetCount(memAddress(count));
}
// --- [ cuDeviceGetName ] ---
/**
* Unsafe version of: {@link #cuDeviceGetName DeviceGetName}
*
* @param len maximum length of string to store in {@code name}
*/
public static int ncuDeviceGetName(long name, int len, int dev) {
long __functionAddress = Functions.DeviceGetName;
return callPI(name, len, dev, __functionAddress);
}
/**
* Returns an identifer string for the device.
*
* Returns an ASCII string identifying the device {@code dev} in the NULL-terminated string pointed to by {@code name}. {@code len} specifies the maximum
* length of the string that may be returned.
*
* @param name returned identifier string for the device
* @param dev device to get identifier string for
*/
@NativeType("CUresult")
public static int cuDeviceGetName(@NativeType("char *") ByteBuffer name, @NativeType("CUdevice") int dev) {
return ncuDeviceGetName(memAddress(name), name.remaining(), dev);
}
// --- [ cuDeviceGetUuid ] ---
/** Unsafe version of: {@link #cuDeviceGetUuid DeviceGetUuid} */
public static int ncuDeviceGetUuid(long uuid, int dev) {
long __functionAddress = Functions.DeviceGetUuid;
if (CHECKS) {
check(__functionAddress);
}
return callPI(uuid, dev, __functionAddress);
}
/**
* Return an UUID for the device.
*
* Note there is a later version of this API, {@link #cuDeviceGetUuid_v2 DeviceGetUuid_v2}. It will supplant this version in 12.0, which is retained for minor version.
* compatibility.
*
* Returns 16-octets identifing the device {@code dev} in the structure pointed by the {@code uuid}.
*
* @param uuid returned UUID
* @param dev device to get identifier string for
*/
@NativeType("CUresult")
public static int cuDeviceGetUuid(@NativeType("CUuuid *") CUuuid uuid, @NativeType("CUdevice") int dev) {
return ncuDeviceGetUuid(uuid.address(), dev);
}
// --- [ cuDeviceGetUuid_v2 ] ---
/** Unsafe version of: {@link #cuDeviceGetUuid_v2 DeviceGetUuid_v2} */
public static int ncuDeviceGetUuid_v2(long uuid, int dev) {
long __functionAddress = Functions.DeviceGetUuid_v2;
if (CHECKS) {
check(__functionAddress);
}
return callPI(uuid, dev, __functionAddress);
}
/**
* Return an UUID for the device (11.4+).
*
* Returns 16-octets identifing the device {@code dev} in the structure pointed by the {@code uuid}. If the device is in MIG mode, returns its MIG UUID
* which uniquely identifies the subscribed MIG compute instance.
*
* @param uuid returned UUID
* @param dev device to get identifier string for
*/
@NativeType("CUresult")
public static int cuDeviceGetUuid_v2(@NativeType("CUuuid *") CUuuid uuid, @NativeType("CUdevice") int dev) {
return ncuDeviceGetUuid_v2(uuid.address(), dev);
}
// --- [ cuDeviceGetLuid ] ---
/** Unsafe version of: {@link #cuDeviceGetLuid DeviceGetLuid} */
public static int ncuDeviceGetLuid(long luid, long deviceNodeMask, int dev) {
long __functionAddress = Functions.DeviceGetLuid;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(luid, deviceNodeMask, dev, __functionAddress);
}
/**
* Return an LUID and device node mask for the device
*
* Return identifying information ({@code luid} and {@code deviceNodeMask}) to allow matching device with graphics APIs.
*
* @param luid returned LUID
* @param deviceNodeMask returned device node mask
* @param dev device to get identifier string for
*/
@NativeType("CUresult")
public static int cuDeviceGetLuid(@NativeType("char *") ByteBuffer luid, @NativeType("unsigned int *") IntBuffer deviceNodeMask, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(deviceNodeMask, 1);
}
return ncuDeviceGetLuid(memAddress(luid), memAddress(deviceNodeMask), dev);
}
// --- [ cuDeviceTotalMem ] ---
/** Unsafe version of: {@link #cuDeviceTotalMem DeviceTotalMem} */
public static int ncuDeviceTotalMem(long bytes, int dev) {
long __functionAddress = Functions.DeviceTotalMem;
return callPI(bytes, dev, __functionAddress);
}
/**
* Returns the total amount of memory on the device
*
* Returns in {@code *bytes} the total amount of memory available on the device {@code dev} in bytes.
*
* @param bytes returned memory available on device in bytes
* @param dev device handle
*/
@NativeType("CUresult")
public static int cuDeviceTotalMem(@NativeType("size_t *") PointerBuffer bytes, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(bytes, 1);
}
return ncuDeviceTotalMem(memAddress(bytes), dev);
}
// --- [ cuDeviceGetTexture1DLinearMaxWidth ] ---
/** Unsafe version of: {@link #cuDeviceGetTexture1DLinearMaxWidth DeviceGetTexture1DLinearMaxWidth} */
public static int ncuDeviceGetTexture1DLinearMaxWidth(long maxWidthInElements, int format, int numChannels, int dev) {
long __functionAddress = Functions.DeviceGetTexture1DLinearMaxWidth;
if (CHECKS) {
check(__functionAddress);
}
return callPI(maxWidthInElements, format, numChannels, dev, __functionAddress);
}
/**
* Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size.
*
* Returns in {@code maxWidthInElements} the maximum number of texture elements allocatable in a 1D linear texture for given {@code format} and {@code
* numChannels}.
*
* @param maxWidthInElements returned maximum number of texture elements allocatable for given {@code format} and {@code numChannels}
* @param format texture format
* @param numChannels number of channels per texture element
* @param dev device handle
*/
@NativeType("CUresult")
public static int cuDeviceGetTexture1DLinearMaxWidth(@NativeType("size_t *") PointerBuffer maxWidthInElements, @NativeType("CUarray_format") int format, @NativeType("unsigned int") int numChannels, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(maxWidthInElements, 1);
}
return ncuDeviceGetTexture1DLinearMaxWidth(memAddress(maxWidthInElements), format, numChannels, dev);
}
// --- [ cuDeviceGetAttribute ] ---
/** Unsafe version of: {@link #cuDeviceGetAttribute DeviceGetAttribute} */
public static int ncuDeviceGetAttribute(long pi, int attrib, int dev) {
long __functionAddress = Functions.DeviceGetAttribute;
return callPI(pi, attrib, dev, __functionAddress);
}
/**
* Returns information about the device.
*
* Returns in {@code *pi} the integer value of the attribute {@code attrib} on device {@code dev}. The supported attributes are:
*
* @param pi returned device attribute value
* @param attrib device attribute to query
* @param dev device handle
*/
@NativeType("CUresult")
public static int cuDeviceGetAttribute(@NativeType("int *") IntBuffer pi, @NativeType("CUdevice_attribute") int attrib, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pi, 1);
}
return ncuDeviceGetAttribute(memAddress(pi), attrib, dev);
}
// --- [ cuDeviceGetNvSciSyncAttributes ] ---
/** Unsafe version of: {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} */
public static int ncuDeviceGetNvSciSyncAttributes(long nvSciSyncAttrList, int dev, int flags) {
long __functionAddress = Functions.DeviceGetNvSciSyncAttributes;
if (CHECKS) {
check(__functionAddress);
}
return callPI(nvSciSyncAttrList, dev, flags, __functionAddress);
}
/**
* Return {@code NvSciSync} attributes that this device can support.
*
* Returns in {@code nvSciSyncAttrList}, the properties of {@code NvSciSync} that this CUDA device, {@code dev} can support. The returned {@code
* nvSciSyncAttrList} can be used to create an {@code NvSciSync} object that matches this device's capabilities.
*
* If {@code NvSciSyncAttrKey_RequiredPerm} field in {@code nvSciSyncAttrList} is already set this API will return {@link #CUDA_ERROR_INVALID_VALUE}.
*
* The applications should set {@code nvSciSyncAttrList} to a valid {@code NvSciSyncAttrList} failing which this API will return
* {@link #CUDA_ERROR_INVALID_HANDLE}.
*
* The {@code flags} controls how applications intends to use the {@code NvSciSync} created from the {@code nvSciSyncAttrList}. The valid flags are:
*
*
* - {@link #CUDA_NVSCISYNC_ATTR_SIGNAL}, specifies that the applications intends to signal an {@code NvSciSync} on this CUDA device.
* - {@link #CUDA_NVSCISYNC_ATTR_WAIT}, specifies that the applications intends to wait on an {@code NvSciSync} on this CUDA device.
*
*
* At least one of these flags must be set, failing which the API returns {@link #CUDA_ERROR_INVALID_VALUE}. Both the flags are orthogonal to one another: a
* developer may set both these flags that allows to set both wait and signal specific attributes in the same {@code nvSciSyncAttrList}.
*
* @param nvSciSyncAttrList return NvSciSync attributes supported
* @param dev valid Cuda Device to get {@code NvSciSync} attributes for
* @param flags flags describing {@code NvSciSync} usage
*/
@NativeType("CUresult")
public static int cuDeviceGetNvSciSyncAttributes(@NativeType("void *") ByteBuffer nvSciSyncAttrList, @NativeType("CUdevice") int dev, int flags) {
return ncuDeviceGetNvSciSyncAttributes(memAddress(nvSciSyncAttrList), dev, flags);
}
// --- [ cuDeviceSetMemPool ] ---
/**
* Sets the current memory pool of a device
*
* The memory pool must be local to the specified device. {@link #cuMemAllocAsync MemAllocAsync} allocates from the current mempool of the provided stream's device. By default,
* a device's current memory pool is its default memory pool.
*
* Note
*
* Use {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} to specify asynchronous allocations from a device different than the one the stream runs on.
*/
@NativeType("CUresult")
public static int cuDeviceSetMemPool(@NativeType("CUdevice") int dev, @NativeType("CUmemoryPool") long pool) {
long __functionAddress = Functions.DeviceSetMemPool;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPI(dev, pool, __functionAddress);
}
// --- [ cuDeviceGetMemPool ] ---
/** Unsafe version of: {@link #cuDeviceGetMemPool DeviceGetMemPool} */
public static int ncuDeviceGetMemPool(long pool, int dev) {
long __functionAddress = Functions.DeviceGetMemPool;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pool, dev, __functionAddress);
}
/**
* Gets the current mempool for a device.
*
* Returns the last pool provided to {@link #cuDeviceSetMemPool DeviceSetMemPool} for this device or the device's default memory pool if {@link #cuDeviceSetMemPool DeviceSetMemPool} has never been called.
* By default the current mempool is the default mempool for a device. Otherwise the returned pool must have been set with {@link #cuDeviceSetMemPool DeviceSetMemPool}.
*/
@NativeType("CUresult")
public static int cuDeviceGetMemPool(@NativeType("CUmemoryPool *") PointerBuffer pool, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pool, 1);
}
return ncuDeviceGetMemPool(memAddress(pool), dev);
}
// --- [ cuDeviceGetDefaultMemPool ] ---
/** Unsafe version of: {@link #cuDeviceGetDefaultMemPool DeviceGetDefaultMemPool} */
public static int ncuDeviceGetDefaultMemPool(long pool_out, int dev) {
long __functionAddress = Functions.DeviceGetDefaultMemPool;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pool_out, dev, __functionAddress);
}
/**
* Returns the default mempool of a device.
*
* The default mempool of a device contains device memory from that device.
*/
@NativeType("CUresult")
public static int cuDeviceGetDefaultMemPool(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pool_out, 1);
}
return ncuDeviceGetDefaultMemPool(memAddress(pool_out), dev);
}
// --- [ cuFlushGPUDirectRDMAWrites ] ---
/**
* Blocks until remote writes are visible to the specified scope.
*
* Blocks until GPUDirect RDMA writes to the target context via mappings created through APIs like nvidia_p2p_get_pages (see
* https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are visible to the specified scope.
*
* If the scope equals or lies within the scope indicated by {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING}, the call will be a no-op and can be safely
* omitted for performance. This can be determined by comparing the numerical values between the two enums, with smaller scopes having smaller values.
*
* Users may query support for this API via {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS}.
*
* @param target the target of the operation, see {@code CUflushGPUDirectRDMAWritesTarget}
* @param scope the scope of the operation, see {@code CUflushGPUDirectRDMAWritesScope}
*/
@NativeType("CUresult")
public static int cuFlushGPUDirectRDMAWrites(@NativeType("CUflushGPUDirectRDMAWritesTarget") int target, @NativeType("CUflushGPUDirectRDMAWritesScope") int scope) {
long __functionAddress = Functions.FlushGPUDirectRDMAWrites;
if (CHECKS) {
check(__functionAddress);
}
return callI(target, scope, __functionAddress);
}
// --- [ cuDeviceGetProperties ] ---
/** Unsafe version of: {@link #cuDeviceGetProperties DeviceGetProperties} */
public static int ncuDeviceGetProperties(long prop, int dev) {
long __functionAddress = Functions.DeviceGetProperties;
return callPI(prop, dev, __functionAddress);
}
/**
* Returns properties for a selected device.
*
* Deprecated: This function was deprecated as of CUDA 5.0 and replaced by {@link #cuDeviceGetAttribute DeviceGetAttribute}.
*
* Returns in {@code *prop} the properties of device {@code dev}.
*
* @param prop returned properties of device
* @param dev device to get properties for
*/
@NativeType("CUresult")
public static int cuDeviceGetProperties(@NativeType("CUdevprop *") CUdevprop prop, @NativeType("CUdevice") int dev) {
return ncuDeviceGetProperties(prop.address(), dev);
}
// --- [ cuDeviceComputeCapability ] ---
/** Unsafe version of: {@link #cuDeviceComputeCapability DeviceComputeCapability} */
public static int ncuDeviceComputeCapability(long major, long minor, int dev) {
long __functionAddress = Functions.DeviceComputeCapability;
return callPPI(major, minor, dev, __functionAddress);
}
/**
* Returns the compute capability of the device.
*
* Deprecated: This function was deprecated as of CUDA 5.0 and its functionality superceded by {@link #cuDeviceGetAttribute DeviceGetAttribute}.
*
* Returns in {@code *major} and {@code *minor} the major and minor revision numbers that define the compute capability of the device {@code dev}.
*
* @param major major revision number
* @param minor minor revision number
* @param dev device handle
*/
@NativeType("CUresult")
public static int cuDeviceComputeCapability(@NativeType("int *") IntBuffer major, @NativeType("int *") IntBuffer minor, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(major, 1);
check(minor, 1);
}
return ncuDeviceComputeCapability(memAddress(major), memAddress(minor), dev);
}
// --- [ cuDevicePrimaryCtxRetain ] ---
/** Unsafe version of: {@link #cuDevicePrimaryCtxRetain DevicePrimaryCtxRetain} */
public static int ncuDevicePrimaryCtxRetain(long pctx, int dev) {
long __functionAddress = Functions.DevicePrimaryCtxRetain;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pctx, dev, __functionAddress);
}
/**
* Retain the primary context on the GPU.
*
* Retains the primary context on the device. Once the user successfully retains the primary context, the primary context will be active and available to
* the user until the user releases it with {@link #cuDevicePrimaryCtxRelease DevicePrimaryCtxRelease} or resets it with {@link #cuDevicePrimaryCtxReset DevicePrimaryCtxReset}. Unlike {@link #cuCtxCreate CtxCreate} the newly retained
* context is not pushed onto the stack.
*
* Retaining the primary context for the first time will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The
* function {@link #cuDeviceGetAttribute DeviceGetAttribute} can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool
* can be used to set the compute mode for devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.
*
* Please note that the primary context always supports pinned allocations. Other flags can be specified by {@link #cuDevicePrimaryCtxSetFlags DevicePrimaryCtxSetFlags}.
*
* @param pctx returned context handle of the new context
* @param dev device for which primary context is requested
*/
@NativeType("CUresult")
public static int cuDevicePrimaryCtxRetain(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pctx, 1);
}
return ncuDevicePrimaryCtxRetain(memAddress(pctx), dev);
}
// --- [ cuDevicePrimaryCtxRelease ] ---
/**
* Release the primary context on the GPU.
*
* Releases the primary context interop on the device. A retained context should always be released once the user is done using it. The context is
* automatically reset once the last reference to it is released. This behavior is different when the primary context was retained by the CUDA runtime
* from CUDA 4.0 and earlier. In this case, the primary context remains always active.
*
* Releasing a primary context that has not been previously retained will fail with {@link #CUDA_ERROR_INVALID_CONTEXT}.
*
* Please note that unlike {@link #cuCtxDestroy CtxDestroy} this method does not pop the context from stack in any circumstances.
*
* @param dev device which primary context is released
*/
@NativeType("CUresult")
public static int cuDevicePrimaryCtxRelease(@NativeType("CUdevice") int dev) {
long __functionAddress = Functions.DevicePrimaryCtxRelease;
if (CHECKS) {
check(__functionAddress);
}
return callI(dev, __functionAddress);
}
// --- [ cuDevicePrimaryCtxSetFlags ] ---
/**
* Set flags for the primary context.
*
* Sets the flags for the primary context on the device overwriting perviously set ones.
*
* The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call,
* interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:
*
*
* - {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but
* may lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
* - {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU,
* but can increase the performance of CPU threads performing work in parallel with the GPU.
* - {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
* - {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
*
*
Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.
* - {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the
* process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS
* threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor
* ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose
* {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
* - {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local
* memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage.
*
*
Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled.
*
*
* @param dev device for which the primary context flags are set
* @param flags new flags for the device
*/
@NativeType("CUresult")
public static int cuDevicePrimaryCtxSetFlags(@NativeType("CUdevice") int dev, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.DevicePrimaryCtxSetFlags;
if (CHECKS) {
check(__functionAddress);
}
return callI(dev, flags, __functionAddress);
}
// --- [ cuDevicePrimaryCtxGetState ] ---
/** Unsafe version of: {@link #cuDevicePrimaryCtxGetState DevicePrimaryCtxGetState} */
public static int ncuDevicePrimaryCtxGetState(int dev, long flags, long active) {
long __functionAddress = Functions.DevicePrimaryCtxGetState;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(dev, flags, active, __functionAddress);
}
/**
* Get the state of the primary context.
*
* Returns in {@code *flags} the flags for the primary context of {@code dev}, and in {@code *active} whether it is active. See
* {@link #cuDevicePrimaryCtxSetFlags DevicePrimaryCtxSetFlags} for flag values.
*
* @param dev device to get primary context flags for
* @param flags pointer to store flags
* @param active pointer to store context state; 0 = inactive, 1 = active
*/
@NativeType("CUresult")
public static int cuDevicePrimaryCtxGetState(@NativeType("CUdevice") int dev, @NativeType("unsigned int *") IntBuffer flags, @NativeType("int *") IntBuffer active) {
if (CHECKS) {
check(flags, 1);
check(active, 1);
}
return ncuDevicePrimaryCtxGetState(dev, memAddress(flags), memAddress(active));
}
// --- [ cuDevicePrimaryCtxReset ] ---
/**
* Destroy all allocations and reset all state on the primary context.
*
* Explicitly destroys and cleans up all resources associated with the current device in the current process.
*
* Note that it is responsibility of the calling function to ensure that no other module in the process is using the device any more. For that reason it
* is recommended to use {@link #cuDevicePrimaryCtxRelease DevicePrimaryCtxRelease} in most cases. However it is safe for other modules to call {@code cuDevicePrimaryCtxRelease()} even
* after resetting the device. Resetting the primary context does not release it, an application that has retained the primary context should explicitly
* release its usage.
*
* @param dev device for which primary context is destroyed
*/
@NativeType("CUresult")
public static int cuDevicePrimaryCtxReset(@NativeType("CUdevice") int dev) {
long __functionAddress = Functions.DevicePrimaryCtxReset;
if (CHECKS) {
check(__functionAddress);
}
return callI(dev, __functionAddress);
}
// --- [ cuDeviceGetExecAffinitySupport ] ---
/** Unsafe version of: {@link #cuDeviceGetExecAffinitySupport DeviceGetExecAffinitySupport} */
public static int ncuDeviceGetExecAffinitySupport(long pi, int type, int dev) {
long __functionAddress = Functions.DeviceGetExecAffinitySupport;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pi, type, dev, __functionAddress);
}
/**
* Returns information about the execution affinity support of the device.
*
* Returns in {@code *pi} whether execution affinity type {@code type} is supported by device {@code dev}. The supported types are:
*
*
* - {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT}: 1 if context with limited SMs is supported by the device, or 0 if not;
*
*
* @param pi 1 if the execution affinity type {@code type} is supported by the device, or 0 if not
* @param type execution affinity type to query
* @param dev device handle
*/
@NativeType("CUresult")
public static int cuDeviceGetExecAffinitySupport(@NativeType("int *") IntBuffer pi, @NativeType("CUexecAffinityType") int type, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pi, 1);
}
return ncuDeviceGetExecAffinitySupport(memAddress(pi), type, dev);
}
// --- [ cuCtxCreate ] ---
/** Unsafe version of: {@link #cuCtxCreate CtxCreate} */
public static int ncuCtxCreate(long pctx, int flags, int dev) {
long __functionAddress = Functions.CtxCreate;
return callPI(pctx, flags, dev, __functionAddress);
}
/**
* Create a CUDA context.
*
* Note
*
* In most cases it is recommended to use {@link #cuDevicePrimaryCtxRetain DevicePrimaryCtxRetain}.
*
* Creates a new CUDA context and associates it with the calling thread. The {@code flags} parameter is described below. The context is created with a
* usage count of 1 and the caller of {@code cuCtxCreate()} must call {@link #cuCtxDestroy CtxDestroy} or when done using the context. If a context is already current to the
* thread, it is supplanted by the newly created context and may be restored by a subsequent call to {@link #cuCtxPopCurrent CtxPopCurrent}.
*
* The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call,
* interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:
*
*
* - {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but may
* lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
* - {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, but
* can increase the performance of CPU threads performing work in parallel with the GPU.
* - {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
* - {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
*
*
Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.
* - {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the
* process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS
* threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor
* ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose
* {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
* - {@link #CU_CTX_MAP_HOST CTX_MAP_HOST}: Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is
* accessible to the GPU.
* - {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local
* memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage.
*
*
Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled. Instead, the
* per-thread stack size can be controlled with {@link #cuCtxSetLimit CtxSetLimit}.
*
*
* Context creation will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The function {@link #cuDeviceGetAttribute DeviceGetAttribute}
* can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool can be used to set the compute
* mode for * devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.
*
* @param pctx returned context handle of the new context
* @param flags context creation flags
* @param dev device to create context on
*/
@NativeType("CUresult")
public static int cuCtxCreate(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("unsigned int") int flags, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pctx, 1);
}
return ncuCtxCreate(memAddress(pctx), flags, dev);
}
// --- [ cuCtxCreate_v3 ] ---
/**
* Unsafe version of: {@link #cuCtxCreate_v3 CtxCreate_v3}
*
* @param numParams number of execution affinity parameters
*/
public static int ncuCtxCreate_v3(long pctx, long paramsArray, int numParams, int flags, int dev) {
long __functionAddress = Functions.CtxCreate_v3;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(pctx, paramsArray, numParams, flags, dev, __functionAddress);
}
/**
* Create a CUDA context with execution affinity.
*
* Creates a new CUDA context with execution affinity and associates it with the calling thread. The {@code paramsArray} and {@code flags} parameter are
* described below. The context is created with a usage count of 1 and the caller of {@link #cuCtxCreate CtxCreate} must call {@link #cuCtxDestroy CtxDestroy} or when done using the
* context. If a context is already current to the thread, it is supplanted by the newly created context and may be restored by a subsequent call to
* {@link #cuCtxPopCurrent CtxPopCurrent}.
*
* The type and the amount of execution resource the context can use is limited by {@code paramsArray} and {@code numParams}. The {@code paramsArray} is
* an array of {@code CUexecAffinityParam} and the {@code numParams} describes the size of the array. If two {@code CUexecAffinityParam} in the array have
* the same type, the latter execution affinity parameter overrides the former execution affinity parameter. The supported execution affinity types are:
*
*
* - {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT} limits the portion of SMs that the context can use. The portion of SMs is specified as the number of SMs via
* {@link CUexecAffinitySmCount}. This limit will be internally rounded up to the next hardware-supported amount. Hence, it is imperative to query the
* actual execution affinity of the context via {@link #cuCtxGetExecAffinity CtxGetExecAffinity}) after context creation. Currently, this attribute is only supported under
* Volta+ MPS.
*
*
* The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call,
* interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:
*
*
* - {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but may
* lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
* - {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, but
* can increase the performance of CPU threads performing work in parallel with the GPU.
* - {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
* - {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
*
*
Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.
* - {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the
* process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS
* threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor
* ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose
* {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
* - {@link #CU_CTX_MAP_HOST CTX_MAP_HOST}: Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is accessible
* to the GPU.
* - {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local
* memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage.
*
*
Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled. Instead, the
* per-thread stack size can be controlled with {@link #cuCtxSetLimit CtxSetLimit}.
*
*
* Context creation will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The function {@link #cuDeviceGetAttribute DeviceGetAttribute}
* can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool can be used to set the compute
* mode for * devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.
*
* @param pctx returned context handle of the new context
* @param paramsArray execution affinity parameters
* @param flags context creation flags
* @param dev device to create context on
*/
@NativeType("CUresult")
public static int cuCtxCreate_v3(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("CUexecAffinityParam *") CUexecAffinityParam.Buffer paramsArray, @NativeType("unsigned int") int flags, @NativeType("CUdevice") int dev) {
if (CHECKS) {
check(pctx, 1);
}
return ncuCtxCreate_v3(memAddress(pctx), paramsArray.address(), paramsArray.remaining(), flags, dev);
}
// --- [ cuCtxDestroy ] ---
/**
* Destroy a CUDA context.
*
* Destroys the CUDA context specified by {@code ctx}. The context {@code ctx} will be destroyed regardless of how many threads it is current to. It is
* the responsibility of the calling function to ensure that no API call issues using {@code ctx} while {@code cuCtxDestroy()} is executing.
*
* Destroys and cleans up all resources associated with the context. It is the caller's responsibility to ensure that the context or its resources are not
* accessed or passed in subsequent API calls and doing so will result in undefined behavior. These resources include CUDA types such as {@code CUmodule},
* {@code CUfunction}, {@code CUstream}, {@code CUevent}, {@code CUarray}, {@code CUmipmappedArray}, {@code CUtexObject}, {@code CUsurfObject},
* {@code CUtexref}, {@code CUsurfref}, {@code CUgraphicsResource}, {@code CUlinkState}, {@code CUexternalMemory} and {@code CUexternalSemaphore}.
*
* If {@code ctx} is current to the calling thread then {@code ctx} will also be popped from the current thread's context stack (as though
* {@link #cuCtxPopCurrent CtxPopCurrent} were called). If {@code ctx} is current to other threads, then {@code ctx} will remain current to those threads, and attempting to
* access {@code ctx} from those threads will result in the error {@link #CUDA_ERROR_CONTEXT_IS_DESTROYED}.
*
* @param ctx context to destroy
*/
@NativeType("CUresult")
public static int cuCtxDestroy(@NativeType("CUcontext") long ctx) {
long __functionAddress = Functions.CtxDestroy;
if (CHECKS) {
check(__functionAddress);
check(ctx);
}
return callPI(ctx, __functionAddress);
}
// --- [ cuCtxPushCurrent ] ---
/**
* Pushes a context on the current CPU thread.
*
* Pushes the given context {@code ctx} onto the CPU thread's stack of current contexts. The specified context becomes the CPU thread's current context,
* so all CUDA functions that operate on the current context are affected.
*
* The previous current context may be made current again by calling {@link #cuCtxDestroy CtxDestroy} or {@link #cuCtxPopCurrent CtxPopCurrent}.
*
* @param ctx context to push
*/
@NativeType("CUresult")
public static int cuCtxPushCurrent(@NativeType("CUcontext") long ctx) {
long __functionAddress = Functions.CtxPushCurrent;
if (CHECKS) {
check(__functionAddress);
check(ctx);
}
return callPI(ctx, __functionAddress);
}
// --- [ cuCtxPopCurrent ] ---
/** Unsafe version of: {@link #cuCtxPopCurrent CtxPopCurrent} */
public static int ncuCtxPopCurrent(long pctx) {
long __functionAddress = Functions.CtxPopCurrent;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pctx, __functionAddress);
}
/**
* Pops the current CUDA context from the current CPU thread.
*
* Pops the current CUDA context from the CPU thread and passes back the old context handle in {@code *pctx}. That context may then be made current to a
* different CPU thread by calling {@link #cuCtxPushCurrent CtxPushCurrent}.
*
* If a context was current to the CPU thread before {@link #cuCtxCreate CtxCreate} or {@link #cuCtxPushCurrent CtxPushCurrent} was called, this function makes that context current to the CPU
* thread again.
*
* @param pctx returned new context handle
*/
@NativeType("CUresult")
public static int cuCtxPopCurrent(@NativeType("CUcontext *") PointerBuffer pctx) {
if (CHECKS) {
check(pctx, 1);
}
return ncuCtxPopCurrent(memAddress(pctx));
}
// --- [ cuCtxSetCurrent ] ---
/**
* Binds the specified CUDA context to the calling CPU thread.
*
* Binds the specified CUDA context to the calling CPU thread. If {@code ctx} is {@code NULL} then the CUDA context previously bound to the calling CPU thread is
* unbound and {@link #CUDA_SUCCESS} is returned.
*
* If there exists a CUDA context stack on the calling CPU thread, this will replace the top of that stack with {@code ctx}. If {@code ctx} is {@code NULL} then
* this will be equivalent to popping the top of the calling CPU thread's CUDA context stack (or a no-op if the calling CPU thread's CUDA context stack is
* empty).
*
* @param ctx context to bind to the calling CPU thread
*/
@NativeType("CUresult")
public static int cuCtxSetCurrent(@NativeType("CUcontext") long ctx) {
long __functionAddress = Functions.CtxSetCurrent;
if (CHECKS) {
check(__functionAddress);
check(ctx);
}
return callPI(ctx, __functionAddress);
}
// --- [ cuCtxGetCurrent ] ---
/** Unsafe version of: {@link #cuCtxGetCurrent CtxGetCurrent} */
public static int ncuCtxGetCurrent(long pctx) {
long __functionAddress = Functions.CtxGetCurrent;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pctx, __functionAddress);
}
/**
* Returns the CUDA context bound to the calling CPU thread.
*
* Returns in {@code *pctx} the CUDA context bound to the calling CPU thread. If no context is bound to the calling CPU thread then {@code *pctx} is set
* to {@code NULL} and {@link #CUDA_SUCCESS} is returned.
*
* @param pctx returned context handle
*/
@NativeType("CUresult")
public static int cuCtxGetCurrent(@NativeType("CUcontext *") PointerBuffer pctx) {
if (CHECKS) {
check(pctx, 1);
}
return ncuCtxGetCurrent(memAddress(pctx));
}
// --- [ cuCtxGetDevice ] ---
/** Unsafe version of: {@link #cuCtxGetDevice CtxGetDevice} */
public static int ncuCtxGetDevice(long device) {
long __functionAddress = Functions.CtxGetDevice;
return callPI(device, __functionAddress);
}
/**
* Returns the device ID for the current context.
*
* Returns in {@code *device} the ordinal of the current context's device.
*
* @param device returned device ID for the current context
*/
@NativeType("CUresult")
public static int cuCtxGetDevice(@NativeType("CUdevice *") IntBuffer device) {
if (CHECKS) {
check(device, 1);
}
return ncuCtxGetDevice(memAddress(device));
}
// --- [ cuCtxGetFlags ] ---
/** Unsafe version of: {@link #cuCtxGetFlags CtxGetFlags} */
public static int ncuCtxGetFlags(long flags) {
long __functionAddress = Functions.CtxGetFlags;
if (CHECKS) {
check(__functionAddress);
}
return callPI(flags, __functionAddress);
}
/**
* Returns the flags for the current context.
*
* Returns in {@code *flags} the flags of the current context. See {@link #cuCtxCreate CtxCreate} for flag values.
*
* @param flags pointer to store flags of current context
*/
@NativeType("CUresult")
public static int cuCtxGetFlags(@NativeType("unsigned int *") IntBuffer flags) {
if (CHECKS) {
check(flags, 1);
}
return ncuCtxGetFlags(memAddress(flags));
}
// --- [ cuCtxSynchronize ] ---
/**
* Block for a context's tasks to complete.
*
* Blocks until the device has completed all preceding requested tasks. {@code cuCtxSynchronize()} returns an error if one of the preceding tasks failed.
* If the context was created with the {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} flag, the CPU thread will block until the GPU context has finished its work.
*/
@NativeType("CUresult")
public static int cuCtxSynchronize() {
long __functionAddress = Functions.CtxSynchronize;
return callI(__functionAddress);
}
// --- [ cuCtxSetLimit ] ---
/**
* Set resource limits.
*
* Setting {@code limit} to {@code value} is a request by the application to update the current limit maintained by the context. The driver is free to
* modify the requested value to meet h/w requirements (this could be clamping to minimum or maximum values, rounding up to nearest element size, etc).
* The application can use {@link #cuCtxGetLimit CtxGetLimit} to find out exactly what the limit has been set to.
*
* Setting each {@code CUlimit} has its own specific restrictions, so each is discussed here.
*
*
* - {@link #CU_LIMIT_STACK_SIZE LIMIT_STACK_SIZE} controls the stack size in bytes of each GPU thread. The driver automatically increases the per-thread stack size for each kernel
* launch as needed. This size isn't reset back to the original value after each launch. Setting this value will take effect immediately, and if
* necessary, the device will block until all preceding requested tasks are complete.
* - {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE} controls the size in bytes of the FIFO used by the {@code printf()} device system call. Setting {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE}
* must be performed before launching any kernel that uses the {@code printf()} device system call, otherwise {@link #CUDA_ERROR_INVALID_VALUE} will be
* returned.
* - {@link #CU_LIMIT_MALLOC_HEAP_SIZE LIMIT_MALLOC_HEAP_SIZE} controls the size in bytes of the heap used by the {@code malloc()} and {@code free()} device system calls. Setting
* {@code CU_LIMIT_MALLOC_HEAP_SIZE} must be performed before launching any kernel that uses the {@code malloc()} or {@code free()} device system
* calls, otherwise {@link #CUDA_ERROR_INVALID_VALUE} will be returned.
* - {@link #CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH LIMIT_DEV_RUNTIME_SYNC_DEPTH} controls the maximum nesting depth of a grid at which a thread can safely call {@code cudaDeviceSynchronize()}.
* Setting this limit must be performed before any launch of a kernel that uses the device runtime and calls {@code cudaDeviceSynchronize()} above the
* default sync depth, two levels of grids. Calls to {@code cudaDeviceSynchronize()} will fail with error code {@code cudaErrorSyncDepthExceeded} if
* the limitation is violated. This limit can be set smaller than the default or up the maximum launch depth of 24. When setting this limit, keep in
* mind that additional levels of sync depth require the driver to reserve large amounts of device memory which can no longer be used for user
* allocations. If these reservations of device memory fail, {@code cuCtxSetLimit()} will return {@link #CUDA_ERROR_OUT_OF_MEMORY}, and the limit can be reset
* to a lower value. This limit is only applicable to devices of compute capability 3.5 and higher. Attempting to set this limit on devices of compute
* capability less than 3.5 will result in the error {@link #CUDA_ERROR_UNSUPPORTED_LIMIT} being returned.
* - {@link #CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT} controls the maximum number of outstanding device runtime launches that can be made from the current
* context. A grid is outstanding from the point of launch up until the grid is known to have been completed. Device runtime launches which violate
* this limitation fail and return {@code cudaErrorLaunchPendingCountExceeded} when {@code cudaGetLastError()} is called after launch. If more pending
* launches than the default (2048 launches) are needed for a module using the device runtime, this limit can be increased. Keep in mind that being
* able to sustain additional pending launches will require the driver to reserve larger amounts of device memory upfront which can no longer be used
* for allocations. If these reservations fail, {@code cuCtxSetLimit()} will return {@link #CUDA_ERROR_OUT_OF_MEMORY}, and the limit can be reset to a lower
* value. This limit is only applicable to devices of compute capability 3.5 and higher. Attempting to set this limit on devices of compute capability
* less than 3.5 will result in the error {@link #CUDA_ERROR_UNSUPPORTED_LIMIT} being returned.
* - {@link #CU_LIMIT_MAX_L2_FETCH_GRANULARITY LIMIT_MAX_L2_FETCH_GRANULARITY} controls the L2 cache fetch granularity. Values can range from 0B to 128B. This is purely a performence hint and it
* can be ignored or clamped depending on the platform.
* - {@link #CU_LIMIT_PERSISTING_L2_CACHE_SIZE LIMIT_PERSISTING_L2_CACHE_SIZE} controls size in bytes availabe for persisting L2 cache. This is purely a performance hint and it can be ignored or
* clamped depending on the platform.
*
*
* @param limit limit to set
* @param value size of limit
*/
@NativeType("CUresult")
public static int cuCtxSetLimit(@NativeType("CUlimit") int limit, @NativeType("size_t") long value) {
long __functionAddress = Functions.CtxSetLimit;
return callPI(limit, value, __functionAddress);
}
// --- [ cuCtxGetLimit ] ---
/** Unsafe version of: {@link #cuCtxGetLimit CtxGetLimit} */
public static int ncuCtxGetLimit(long pvalue, int limit) {
long __functionAddress = Functions.CtxGetLimit;
return callPI(pvalue, limit, __functionAddress);
}
/**
* Returns resource limits.
*
* Returns in {@code *pvalue} the current size of {@code limit}.
*
* @param pvalue returned size of limit
* @param limit limit to query
*/
@NativeType("CUresult")
public static int cuCtxGetLimit(@NativeType("size_t *") PointerBuffer pvalue, @NativeType("CUlimit") int limit) {
if (CHECKS) {
check(pvalue, 1);
}
return ncuCtxGetLimit(memAddress(pvalue), limit);
}
// --- [ cuCtxGetCacheConfig ] ---
/** Unsafe version of: {@link #cuCtxGetCacheConfig CtxGetCacheConfig} */
public static int ncuCtxGetCacheConfig(long pconfig) {
long __functionAddress = Functions.CtxGetCacheConfig;
return callPI(pconfig, __functionAddress);
}
/**
* Returns the preferred cache configuration for the current context.
*
* On devices where the L1 cache and shared memory use the same hardware resources, this function returns through {@code pconfig} the preferred cache
* configuration for the current context. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose
* a different configuration if required to execute functions.
*
* This will return a {@code pconfig} of {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} on devices where the size of the L1 cache and shared memory are fixed.
*
* @param pconfig returned cache configuration
*/
@NativeType("CUresult")
public static int cuCtxGetCacheConfig(@NativeType("CUfunc_cache *") IntBuffer pconfig) {
if (CHECKS) {
check(pconfig, 1);
}
return ncuCtxGetCacheConfig(memAddress(pconfig));
}
// --- [ cuCtxSetCacheConfig ] ---
/**
* Sets the preferred cache configuration for the current context.
*
* On devices where the L1 cache and shared memory use the same hardware resources, this sets through {@code config} the preferred cache configuration for
* the current context. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose a different
* configuration if required to execute the function. Any function preference set via {@code cuFuncSetCacheConfig()} will be preferred over this
* context-wide setting. Setting the context-wide cache configuration to {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} will cause subsequent kernel launches to prefer to not
* change the cache configuration unless required to launch the kernel.
*
* This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.
*
* Launching a kernel with a different preference than the most recent preference setting may insert a device-side synchronization point.
*
* @param config requested cache configuration
*/
@NativeType("CUresult")
public static int cuCtxSetCacheConfig(@NativeType("CUfunc_cache") int config) {
long __functionAddress = Functions.CtxSetCacheConfig;
return callI(config, __functionAddress);
}
// --- [ cuCtxGetSharedMemConfig ] ---
/** Unsafe version of: {@link #cuCtxGetSharedMemConfig CtxGetSharedMemConfig} */
public static int ncuCtxGetSharedMemConfig(long pConfig) {
long __functionAddress = Functions.CtxGetSharedMemConfig;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pConfig, __functionAddress);
}
/**
* Returns the current shared memory configuration for the current context.
*
* This function will return in {@code pConfig} the current size of shared memory banks in the current context. On devices with configurable shared memory
* banks, {@link #cuCtxSetSharedMemConfig CtxSetSharedMemConfig} can be used to change this setting, so that all subsequent kernel launches will by default use the new bank size.
* When {@code cuCtxGetSharedMemConfig} is called on devices without configurable shared memory, it will return the fixed bank size of the hardware.
*
* @param pConfig returned shared memory configuration
*/
@NativeType("CUresult")
public static int cuCtxGetSharedMemConfig(@NativeType("CUsharedconfig *") IntBuffer pConfig) {
if (CHECKS) {
check(pConfig, 1);
}
return ncuCtxGetSharedMemConfig(memAddress(pConfig));
}
// --- [ cuCtxSetSharedMemConfig ] ---
/**
* Sets the shared memory configuration for the current context.
*
* On devices with configurable shared memory banks, this function will set the context's shared memory bank size which is used for subsequent kernel
* launches.
*
* Changed the shared memory configuration between launches may insert a device side synchronization point between those launches.
*
* Changing the shared memory bank size will not increase shared memory usage or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory, but will change what kinds of accesses to shared memory will result in
* bank conflicts.
*
* This function will do nothing on devices with fixed shared memory bank size.
*
* @param config requested shared memory configuration
*/
@NativeType("CUresult")
public static int cuCtxSetSharedMemConfig(@NativeType("CUsharedconfig") int config) {
long __functionAddress = Functions.CtxSetSharedMemConfig;
if (CHECKS) {
check(__functionAddress);
}
return callI(config, __functionAddress);
}
// --- [ cuCtxGetApiVersion ] ---
/** Unsafe version of: {@link #cuCtxGetApiVersion CtxGetApiVersion} */
public static int ncuCtxGetApiVersion(long ctx, long version) {
long __functionAddress = Functions.CtxGetApiVersion;
return callPPI(ctx, version, __functionAddress);
}
/**
* Gets the context's API version.
*
* Returns a version number in {@code version} corresponding to the capabilities of the context (e.g. 3010 or 3020), which library developers can use to
* direct callers to a specific API version. If {@code ctx} is {@code NULL}, returns the API version used to create the currently bound context.
*
* Note that new API versions are only introduced when context capabilities are changed that break binary compatibility, so the API version and driver
* version may be different. For example, it is valid for the API version to be 3020 while the driver version is 4020.
*
* @param ctx context to check
* @param version pointer to version
*/
@NativeType("CUresult")
public static int cuCtxGetApiVersion(@NativeType("CUcontext") long ctx, @NativeType("unsigned int *") IntBuffer version) {
if (CHECKS) {
check(version, 1);
}
return ncuCtxGetApiVersion(ctx, memAddress(version));
}
// --- [ cuCtxGetStreamPriorityRange ] ---
/** Unsafe version of: {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange} */
public static int ncuCtxGetStreamPriorityRange(long leastPriority, long greatestPriority) {
long __functionAddress = Functions.CtxGetStreamPriorityRange;
return callPPI(leastPriority, greatestPriority, __functionAddress);
}
/**
* Returns numerical values that correspond to the least and greatest stream priorities.
*
* Returns in {@code *leastPriority} and {@code *greatestPriority} the numerical values that correspond to the least and greatest stream priorities
* respectively. Stream priorities follow a convention where lower numbers imply greater priorities. The range of meaningful stream priorities is given by
* [ {@code *greatestPriority}, {@code *leastPriority]}. If the user attempts to create a stream with a priority value that is outside the meaningful
* range as specified by this API, the priority is automatically clamped down or up to either {@code *leastPriority} or {@code *greatestPriority}
* respectively. See {@link #cuStreamCreateWithPriority StreamCreateWithPriority} for details on creating a priority stream. A {@code NULL} may be passed in for {@code *leastPriority} or {@code
*greatestPriority} if the value is not desired.
*
* This function will return {@code 0} in both {@code *leastPriority} and {@code *greatestPriority} if the current context's device does not support
* stream priorities (see {@link #cuDeviceGetAttribute DeviceGetAttribute}).
*
* @param leastPriority pointer to an int in which the numerical value for least stream priority is returned
* @param greatestPriority pointer to an int in which the numerical value for greatest stream priority is returned
*/
@NativeType("CUresult")
public static int cuCtxGetStreamPriorityRange(@Nullable @NativeType("int *") IntBuffer leastPriority, @Nullable @NativeType("int *") IntBuffer greatestPriority) {
if (CHECKS) {
checkSafe(leastPriority, 1);
checkSafe(greatestPriority, 1);
}
return ncuCtxGetStreamPriorityRange(memAddressSafe(leastPriority), memAddressSafe(greatestPriority));
}
// --- [ cuCtxResetPersistingL2Cache ] ---
/**
* Resets all persisting lines in cache to normal status.
*
* Takes effect on function return.
*/
@NativeType("CUresult")
public static int cuCtxResetPersistingL2Cache() {
long __functionAddress = Functions.CtxResetPersistingL2Cache;
if (CHECKS) {
check(__functionAddress);
}
return callI(__functionAddress);
}
// --- [ cuCtxGetExecAffinity ] ---
/** Unsafe version of: {@link #cuCtxGetExecAffinity CtxGetExecAffinity} */
public static int ncuCtxGetExecAffinity(long pExecAffinity, int type) {
long __functionAddress = Functions.CtxGetExecAffinity;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pExecAffinity, type, __functionAddress);
}
/**
* Returns the execution affinity setting for the current context.
*
* Returns in {@code *pExecAffinity} the current value of {@code type}.
*
* @param pExecAffinity returned execution affinity
* @param type execution affinity type to query
*/
@NativeType("CUresult")
public static int cuCtxGetExecAffinity(@NativeType("CUexecAffinityParam *") CUexecAffinityParam.Buffer pExecAffinity, @NativeType("CUexecAffinityType") int type) {
if (CHECKS) {
check(pExecAffinity, 1);
}
return ncuCtxGetExecAffinity(pExecAffinity.address(), type);
}
// --- [ cuCtxAttach ] ---
/** Unsafe version of: {@link #cuCtxAttach CtxAttach} */
public static int ncuCtxAttach(long pctx, int flags) {
long __functionAddress = Functions.CtxAttach;
return callPI(pctx, flags, __functionAddress);
}
/**
* Increment a context's usage-count.
*
* Deprecated: Note that this function is deprecated and should not be used.
*
* Increments the usage count of the context and passes back a context handle in {@code *pctx} that must be passed to {@link #cuCtxDetach CtxDetach} when the application
* is done with the context. {@code cuCtxAttach()} fails if there is no context current to the thread.
*
* Currently, the {@code flags} parameter must be 0.
*
* @param pctx returned context handle of the current context
* @param flags context attach flags (must be 0)
*/
@NativeType("CUresult")
public static int cuCtxAttach(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(pctx, 1);
}
return ncuCtxAttach(memAddress(pctx), flags);
}
// --- [ cuCtxDetach ] ---
/**
* Decrement a context's usage-count
*
* Deprecated: Note that this function is deprecated and should not be used.
*
* Decrements the usage count of the context {@code ctx}, and destroys the context if the usage count goes to 0. The context must be a handle that was
* passed back by {@link #cuCtxCreate CtxCreate} or {@link #cuCtxAttach CtxAttach}, and must be current to the calling thread.
*
* @param ctx context to destroy
*/
@NativeType("CUresult")
public static int cuCtxDetach(@NativeType("CUcontext") long ctx) {
long __functionAddress = Functions.CtxDetach;
if (CHECKS) {
check(ctx);
}
return callPI(ctx, __functionAddress);
}
// --- [ cuModuleLoad ] ---
/** Unsafe version of: {@link #cuModuleLoad ModuleLoad} */
public static int ncuModuleLoad(long module, long fname) {
long __functionAddress = Functions.ModuleLoad;
return callPPI(module, fname, __functionAddress);
}
/**
* Loads a compute module.
*
* Takes a filename {@code fname} and loads the corresponding module {@code module} into the current context. The CUDA driver API does not attempt to
* lazily allocate the resources needed by a module; if the memory for functions and data (constant and global) needed by the module cannot be allocated,
* {@code cuModuleLoad()} fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc
* or handwritten, or a fatbin file as output by nvcc from toolchain 4.0 or later.
*
* @param module returned module
* @param fname filename of module to load
*/
@NativeType("CUresult")
public static int cuModuleLoad(@NativeType("CUmodule *") PointerBuffer module, @NativeType("char const *") ByteBuffer fname) {
if (CHECKS) {
check(module, 1);
checkNT1(fname);
}
return ncuModuleLoad(memAddress(module), memAddress(fname));
}
/**
* Loads a compute module.
*
* Takes a filename {@code fname} and loads the corresponding module {@code module} into the current context. The CUDA driver API does not attempt to
* lazily allocate the resources needed by a module; if the memory for functions and data (constant and global) needed by the module cannot be allocated,
* {@code cuModuleLoad()} fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc
* or handwritten, or a fatbin file as output by nvcc from toolchain 4.0 or later.
*
* @param module returned module
* @param fname filename of module to load
*/
@NativeType("CUresult")
public static int cuModuleLoad(@NativeType("CUmodule *") PointerBuffer module, @NativeType("char const *") CharSequence fname) {
if (CHECKS) {
check(module, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(fname, true);
long fnameEncoded = stack.getPointerAddress();
return ncuModuleLoad(memAddress(module), fnameEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuModuleLoadData ] ---
/** Unsafe version of: {@link #cuModuleLoadData ModuleLoadData} */
public static int ncuModuleLoadData(long module, long image) {
long __functionAddress = Functions.ModuleLoadData;
return callPPI(module, image, __functionAddress);
}
/**
* Load a module's data.
*
* Takes a pointer {@code image} and loads the corresponding module {@code module} into the current context. The pointer may be obtained by mapping a
* cubin or PTX or fatbin file, passing a cubin or PTX or fatbin file as a NULL-terminated text
* string, or incorporating a cubin or fatbin object into the executable resources and using operating system calls such as Windows
* {@code FindResource()} to obtain the pointer.
*
* @param module returned module
* @param image module data to load
*/
@NativeType("CUresult")
public static int cuModuleLoadData(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer image) {
if (CHECKS) {
check(module, 1);
}
return ncuModuleLoadData(memAddress(module), memAddress(image));
}
// --- [ cuModuleLoadDataEx ] ---
/**
* Unsafe version of: {@link #cuModuleLoadDataEx ModuleLoadDataEx}
*
* @param numOptions number of options
*/
public static int ncuModuleLoadDataEx(long module, long image, int numOptions, long options, long optionValues) {
long __functionAddress = Functions.ModuleLoadDataEx;
return callPPPPI(module, image, numOptions, options, optionValues, __functionAddress);
}
/**
* Load a module's data with options.
*
* Takes a pointer {@code image} and loads the corresponding module {@code module} into the current context. The pointer may be obtained by mapping a
* cubin or PTX or fatbin file, passing a cubin or PTX or fatbin file as a NULL-terminated text
* string, or incorporating a cubin or fatbin object into the executable resources and using operating system calls such as Windows
* {@code FindResource()} to obtain the pointer. Options are passed as an array via {@code options} and any corresponding parameters are passed in {@code
* optionValues}. The number of total options is supplied via {@code numOptions}. Any outputs will be returned via {@code optionValues}.
*
* @param module returned module
* @param image module data to load
* @param options options for JIT
* @param optionValues option values for JIT
*/
@NativeType("CUresult")
public static int cuModuleLoadDataEx(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer image, @Nullable @NativeType("CUjit_option *") IntBuffer options, @Nullable @NativeType("void **") PointerBuffer optionValues) {
if (CHECKS) {
check(module, 1);
checkSafe(optionValues, remainingSafe(options));
}
return ncuModuleLoadDataEx(memAddress(module), memAddress(image), remainingSafe(options), memAddressSafe(options), memAddressSafe(optionValues));
}
// --- [ cuModuleLoadFatBinary ] ---
/** Unsafe version of: {@link #cuModuleLoadFatBinary ModuleLoadFatBinary} */
public static int ncuModuleLoadFatBinary(long module, long fatCubin) {
long __functionAddress = Functions.ModuleLoadFatBinary;
return callPPI(module, fatCubin, __functionAddress);
}
/**
* Load a module's data.
*
* Takes a pointer {@code fatCubin} and loads the corresponding module {@code module} into the current context. The pointer represents a fat binary
* object, which is a collection of different cubin and/or PTX files, all representing the same device code, but compiled and optimized
* for different architectures.
*
* Prior to CUDA 4.0, there was no documented API for constructing and using fat binary objects by programmers. Starting with CUDA 4.0, fat binary objects
* can be constructed by providing the -fatbin option to nvcc. More information can be found in the nvcc document.
*
* @param module returned module
* @param fatCubin fat binary to load
*/
@NativeType("CUresult")
public static int cuModuleLoadFatBinary(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer fatCubin) {
if (CHECKS) {
check(module, 1);
}
return ncuModuleLoadFatBinary(memAddress(module), memAddress(fatCubin));
}
// --- [ cuModuleUnload ] ---
/**
* Unloads a module.
*
* Unloads a module {@code hmod} from the current context.
*
* @param hmod module to unload
*/
@NativeType("CUresult")
public static int cuModuleUnload(@NativeType("CUmodule") long hmod) {
long __functionAddress = Functions.ModuleUnload;
if (CHECKS) {
check(hmod);
}
return callPI(hmod, __functionAddress);
}
// --- [ cuModuleGetFunction ] ---
/** Unsafe version of: {@link #cuModuleGetFunction ModuleGetFunction} */
public static int ncuModuleGetFunction(long hfunc, long hmod, long name) {
long __functionAddress = Functions.ModuleGetFunction;
if (CHECKS) {
check(hmod);
}
return callPPPI(hfunc, hmod, name, __functionAddress);
}
/**
* Returns a function handle.
*
* Returns in {@code *hfunc} the handle of the function of name {@code name} located in module {@code hmod}. If no function of that name exists,
* {@code cuModuleGetFunction()} returns {@link #CUDA_ERROR_NOT_FOUND}.
*
* @param hfunc returned function handle
* @param hmod module to retrieve function from
* @param name name of function to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetFunction(@NativeType("CUfunction *") PointerBuffer hfunc, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) {
if (CHECKS) {
check(hfunc, 1);
checkNT1(name);
}
return ncuModuleGetFunction(memAddress(hfunc), hmod, memAddress(name));
}
/**
* Returns a function handle.
*
* Returns in {@code *hfunc} the handle of the function of name {@code name} located in module {@code hmod}. If no function of that name exists,
* {@code cuModuleGetFunction()} returns {@link #CUDA_ERROR_NOT_FOUND}.
*
* @param hfunc returned function handle
* @param hmod module to retrieve function from
* @param name name of function to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetFunction(@NativeType("CUfunction *") PointerBuffer hfunc, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) {
if (CHECKS) {
check(hfunc, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(name, true);
long nameEncoded = stack.getPointerAddress();
return ncuModuleGetFunction(memAddress(hfunc), hmod, nameEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuModuleGetGlobal ] ---
/** Unsafe version of: {@link #cuModuleGetGlobal ModuleGetGlobal} */
public static int ncuModuleGetGlobal(long dptr, long bytes, long hmod, long name) {
long __functionAddress = Functions.ModuleGetGlobal;
if (CHECKS) {
check(hmod);
}
return callPPPPI(dptr, bytes, hmod, name, __functionAddress);
}
/**
* Returns a global pointer from a module.
*
* Returns in {@code *dptr} and {@code *bytes} the base pointer and size of the global of name {@code name} located in module {@code hmod}. If no variable
* of that name exists, {@code cuModuleGetGlobal()} returns {@link #CUDA_ERROR_NOT_FOUND}. Both parameters {@code dptr} and {@code bytes} are optional. If one of
* them is {@code NULL}, it is ignored.
*
* @param dptr returned global device pointer
* @param bytes returned global size in bytes
* @param hmod module to retrieve global from
* @param name name of global to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetGlobal(@Nullable @NativeType("CUdeviceptr *") PointerBuffer dptr, @Nullable @NativeType("size_t *") PointerBuffer bytes, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) {
if (CHECKS) {
checkSafe(dptr, 1);
checkSafe(bytes, 1);
checkNT1(name);
}
return ncuModuleGetGlobal(memAddressSafe(dptr), memAddressSafe(bytes), hmod, memAddress(name));
}
/**
* Returns a global pointer from a module.
*
* Returns in {@code *dptr} and {@code *bytes} the base pointer and size of the global of name {@code name} located in module {@code hmod}. If no variable
* of that name exists, {@code cuModuleGetGlobal()} returns {@link #CUDA_ERROR_NOT_FOUND}. Both parameters {@code dptr} and {@code bytes} are optional. If one of
* them is {@code NULL}, it is ignored.
*
* @param dptr returned global device pointer
* @param bytes returned global size in bytes
* @param hmod module to retrieve global from
* @param name name of global to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetGlobal(@Nullable @NativeType("CUdeviceptr *") PointerBuffer dptr, @Nullable @NativeType("size_t *") PointerBuffer bytes, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) {
if (CHECKS) {
checkSafe(dptr, 1);
checkSafe(bytes, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(name, true);
long nameEncoded = stack.getPointerAddress();
return ncuModuleGetGlobal(memAddressSafe(dptr), memAddressSafe(bytes), hmod, nameEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuModuleGetTexRef ] ---
/** Unsafe version of: {@link #cuModuleGetTexRef ModuleGetTexRef} */
public static int ncuModuleGetTexRef(long pTexRef, long hmod, long name) {
long __functionAddress = Functions.ModuleGetTexRef;
if (CHECKS) {
check(hmod);
}
return callPPPI(pTexRef, hmod, name, __functionAddress);
}
/**
* Returns a handle to a texture reference.
*
* Returns in {@code *pTexRef} the handle of the texture reference of name {@code name} in the module {@code hmod}. If no texture reference of that name
* exists, {@code cuModuleGetTexRef()} returns {@link #CUDA_ERROR_NOT_FOUND}. This texture reference handle should not be destroyed, since it will be destroyed
* when the module is unloaded.
*
* @param pTexRef returned texture reference
* @param hmod module to retrieve texture reference from
* @param name name of texture reference to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetTexRef(@NativeType("CUtexref *") PointerBuffer pTexRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) {
if (CHECKS) {
check(pTexRef, 1);
checkNT1(name);
}
return ncuModuleGetTexRef(memAddress(pTexRef), hmod, memAddress(name));
}
/**
* Returns a handle to a texture reference.
*
* Returns in {@code *pTexRef} the handle of the texture reference of name {@code name} in the module {@code hmod}. If no texture reference of that name
* exists, {@code cuModuleGetTexRef()} returns {@link #CUDA_ERROR_NOT_FOUND}. This texture reference handle should not be destroyed, since it will be destroyed
* when the module is unloaded.
*
* @param pTexRef returned texture reference
* @param hmod module to retrieve texture reference from
* @param name name of texture reference to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetTexRef(@NativeType("CUtexref *") PointerBuffer pTexRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) {
if (CHECKS) {
check(pTexRef, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(name, true);
long nameEncoded = stack.getPointerAddress();
return ncuModuleGetTexRef(memAddress(pTexRef), hmod, nameEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuModuleGetSurfRef ] ---
/** Unsafe version of: {@link #cuModuleGetSurfRef ModuleGetSurfRef} */
public static int ncuModuleGetSurfRef(long pSurfRef, long hmod, long name) {
long __functionAddress = Functions.ModuleGetSurfRef;
if (CHECKS) {
check(hmod);
}
return callPPPI(pSurfRef, hmod, name, __functionAddress);
}
/**
* Returns a handle to a surface reference.
*
* Returns in {@code *pSurfRef} the handle of the surface reference of name {@code name} in the module {@code hmod}. If no surface reference of that name
* exists, {@code cuModuleGetSurfRef()} returns {@link #CUDA_ERROR_NOT_FOUND}.
*
* @param pSurfRef returned surface reference
* @param hmod module to retrieve surface reference from
* @param name name of surface reference to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetSurfRef(@NativeType("CUsurfref *") PointerBuffer pSurfRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) {
if (CHECKS) {
check(pSurfRef, 1);
checkNT1(name);
}
return ncuModuleGetSurfRef(memAddress(pSurfRef), hmod, memAddress(name));
}
/**
* Returns a handle to a surface reference.
*
* Returns in {@code *pSurfRef} the handle of the surface reference of name {@code name} in the module {@code hmod}. If no surface reference of that name
* exists, {@code cuModuleGetSurfRef()} returns {@link #CUDA_ERROR_NOT_FOUND}.
*
* @param pSurfRef returned surface reference
* @param hmod module to retrieve surface reference from
* @param name name of surface reference to retrieve
*/
@NativeType("CUresult")
public static int cuModuleGetSurfRef(@NativeType("CUsurfref *") PointerBuffer pSurfRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) {
if (CHECKS) {
check(pSurfRef, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(name, true);
long nameEncoded = stack.getPointerAddress();
return ncuModuleGetSurfRef(memAddress(pSurfRef), hmod, nameEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuLinkCreate ] ---
/**
* Unsafe version of: {@link #cuLinkCreate LinkCreate}
*
* @param numOptions size of options arrays
*/
public static int ncuLinkCreate(int numOptions, long options, long optionValues, long stateOut) {
long __functionAddress = Functions.LinkCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(numOptions, options, optionValues, stateOut, __functionAddress);
}
/**
* Creates a pending JIT linker invocation.
*
* If the call is successful, the caller owns the returned {@code CUlinkState}, which should eventually be destroyed with {@link #cuLinkDestroy LinkDestroy}. The device code
* machine size (32 or 64 bit) will match the calling application.
*
* Both linker and compiler options may be specified. Compiler options will be applied to inputs to this linker action which must be compiled from PTX.
* The options {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES}, and {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES} will accumulate data until the {@code CUlinkState} is
* destroyed.
*
* {@code optionValues} must remain valid for the life of the {@code CUlinkState} if output options are used. No other references to inputs are maintained
* after this call returns.
*
* @param options array of linker and compiler options
* @param optionValues array of option values, each cast to void *
* @param stateOut on success, this will contain a {@code CUlinkState} to specify and complete this action
*/
@NativeType("CUresult")
public static int cuLinkCreate(@NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues, @NativeType("CUlinkState *") PointerBuffer stateOut) {
if (CHECKS) {
check(optionValues, options.remaining());
check(stateOut, 1);
}
return ncuLinkCreate(options.remaining(), memAddress(options), memAddress(optionValues), memAddress(stateOut));
}
// --- [ cuLinkAddData ] ---
/**
* Unsafe version of: {@link #cuLinkAddData LinkAddData}
*
* @param size the length of the input data
* @param numOptions size of options
*/
public static int ncuLinkAddData(long state, int type, long data, long size, long name, int numOptions, long options, long optionValues) {
long __functionAddress = Functions.LinkAddData;
if (CHECKS) {
check(__functionAddress);
check(state);
}
return callPPPPPPI(state, type, data, size, name, numOptions, options, optionValues, __functionAddress);
}
/**
* Add an input to a pending linker invocation.
*
* Ownership of {@code data} is retained by the caller. No reference is retained to any inputs after this call returns.
*
* This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME},
* {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.
*
* @param state a pending linker action
* @param type the type of the input data
* @param data the input data. PTX must be NULL-terminated.
* @param name an optional name for this input in log messages
* @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate})
* @param optionValues array of option values, each cast to void *
*/
@NativeType("CUresult")
public static int cuLinkAddData(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("void *") ByteBuffer data, @NativeType("char const *") ByteBuffer name, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) {
if (CHECKS) {
checkNT1(name);
check(optionValues, options.remaining());
}
return ncuLinkAddData(state, type, memAddress(data), data.remaining(), memAddress(name), options.remaining(), memAddress(options), memAddress(optionValues));
}
/**
* Add an input to a pending linker invocation.
*
* Ownership of {@code data} is retained by the caller. No reference is retained to any inputs after this call returns.
*
* This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME},
* {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.
*
* @param state a pending linker action
* @param type the type of the input data
* @param data the input data. PTX must be NULL-terminated.
* @param name an optional name for this input in log messages
* @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate})
* @param optionValues array of option values, each cast to void *
*/
@NativeType("CUresult")
public static int cuLinkAddData(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("void *") ByteBuffer data, @NativeType("char const *") CharSequence name, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) {
if (CHECKS) {
check(optionValues, options.remaining());
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(name, true);
long nameEncoded = stack.getPointerAddress();
return ncuLinkAddData(state, type, memAddress(data), data.remaining(), nameEncoded, options.remaining(), memAddress(options), memAddress(optionValues));
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuLinkAddFile ] ---
/**
* Unsafe version of: {@link #cuLinkAddFile LinkAddFile}
*
* @param numOptions size of options
*/
public static int ncuLinkAddFile(long state, int type, long path, int numOptions, long options, long optionValues) {
long __functionAddress = Functions.LinkAddFile;
if (CHECKS) {
check(__functionAddress);
check(state);
}
return callPPPPI(state, type, path, numOptions, options, optionValues, __functionAddress);
}
/**
* Add a file input to a pending linker invocation.
*
* No reference is retained to any inputs after this call returns.
*
* This method accepts only compiler options, which are used if the input must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME},
* {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.
*
* This method is equivalent to invoking {@link #cuLinkAddData LinkAddData} on the contents of the file.
*
* @param state a pending linker action
* @param type the type of the input data
* @param path path to the input file
* @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate})
* @param optionValues array of option values, each cast to void *
*/
@NativeType("CUresult")
public static int cuLinkAddFile(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("char const *") ByteBuffer path, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) {
if (CHECKS) {
checkNT1(path);
check(optionValues, options.remaining());
}
return ncuLinkAddFile(state, type, memAddress(path), options.remaining(), memAddress(options), memAddress(optionValues));
}
/**
* Add a file input to a pending linker invocation.
*
* No reference is retained to any inputs after this call returns.
*
* This method accepts only compiler options, which are used if the input must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME},
* {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.
*
* This method is equivalent to invoking {@link #cuLinkAddData LinkAddData} on the contents of the file.
*
* @param state a pending linker action
* @param type the type of the input data
* @param path path to the input file
* @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate})
* @param optionValues array of option values, each cast to void *
*/
@NativeType("CUresult")
public static int cuLinkAddFile(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("char const *") CharSequence path, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) {
if (CHECKS) {
check(optionValues, options.remaining());
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(path, true);
long pathEncoded = stack.getPointerAddress();
return ncuLinkAddFile(state, type, pathEncoded, options.remaining(), memAddress(options), memAddress(optionValues));
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuLinkComplete ] ---
/** Unsafe version of: {@link #cuLinkComplete LinkComplete} */
public static int ncuLinkComplete(long state, long cubinOut, long sizeOut) {
long __functionAddress = Functions.LinkComplete;
if (CHECKS) {
check(__functionAddress);
check(state);
}
return callPPPI(state, cubinOut, sizeOut, __functionAddress);
}
/**
* Complete a pending linker invocation.
*
* Completes the pending linker action and returns the cubin image for the linked device code, which can be used with {@link #cuModuleLoadData ModuleLoadData}. The cubin is
* owned by {@code state}, so it should be loaded before {@code state} is destroyed via {@link #cuLinkDestroy LinkDestroy}. This call does not destroy {@code state}.
*
* @param state a pending linker invocation
* @param cubinOut on success, this will point to the output image
* @param sizeOut optional parameter to receive the size of the generated image
*/
@NativeType("CUresult")
public static int cuLinkComplete(@NativeType("CUlinkState") long state, @NativeType("void **") PointerBuffer cubinOut, @NativeType("size_t *") PointerBuffer sizeOut) {
if (CHECKS) {
check(cubinOut, 1);
check(sizeOut, 1);
}
return ncuLinkComplete(state, memAddress(cubinOut), memAddress(sizeOut));
}
// --- [ cuLinkDestroy ] ---
/**
* Destroys state for a JIT linker invocation.
*
* @param state state object for the linker invocation
*/
@NativeType("CUresult")
public static int cuLinkDestroy(@NativeType("CUlinkState") long state) {
long __functionAddress = Functions.LinkDestroy;
if (CHECKS) {
check(__functionAddress);
check(state);
}
return callPI(state, __functionAddress);
}
// --- [ cuMemGetInfo ] ---
/** Unsafe version of: {@link #cuMemGetInfo MemGetInfo} */
public static int ncuMemGetInfo(long free, long total) {
long __functionAddress = Functions.MemGetInfo;
return callPPI(free, total, __functionAddress);
}
/**
* Gets free and total memory.
*
* Returns in {@code *total} the total amount of memory available to the the current context. Returns in {@code *free} the amount of memory on the device
* that is free according to the OS. CUDA is not guaranteed to be able to allocate all of the memory that the OS reports as free.
*
* @param free returned free memory in bytes
* @param total returned total memory in bytes
*/
@NativeType("CUresult")
public static int cuMemGetInfo(@NativeType("size_t *") PointerBuffer free, @NativeType("size_t *") PointerBuffer total) {
if (CHECKS) {
check(free, 1);
check(total, 1);
}
return ncuMemGetInfo(memAddress(free), memAddress(total));
}
// --- [ cuMemAlloc ] ---
/** Unsafe version of: {@link #cuMemAlloc MemAlloc} */
public static int ncuMemAlloc(long dptr, long bytesize) {
long __functionAddress = Functions.MemAlloc;
return callPPI(dptr, bytesize, __functionAddress);
}
/**
* Allocates device memory.
*
* Allocates {@code bytesize} bytes of linear memory on the device and returns in {@code *dptr} a pointer to the allocated memory. The allocated memory is
* suitably aligned for any kind of variable. The memory is not cleared. If {@code bytesize} is 0, {@code cuMemAlloc()} returns {@link #CUDA_ERROR_INVALID_VALUE}.
*
* @param dptr returned device pointer
* @param bytesize requested allocation size in bytes
*/
@NativeType("CUresult")
public static int cuMemAlloc(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize) {
if (CHECKS) {
check(dptr, 1);
}
return ncuMemAlloc(memAddress(dptr), bytesize);
}
// --- [ cuMemAllocPitch ] ---
/** Unsafe version of: {@link #cuMemAllocPitch MemAllocPitch} */
public static int ncuMemAllocPitch(long dptr, long pPitch, long WidthInBytes, long Height, int ElementSizeBytes) {
long __functionAddress = Functions.MemAllocPitch;
return callPPPPI(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes, __functionAddress);
}
/**
* Allocates pitched device memory.
*
* Allocates at least {@code WidthInBytes} * {@code Height} bytes of linear memory on the device and returns in {@code *dptr} a pointer to the allocated
* memory. The function may pad the allocation to ensure that corresponding pointers in any given row will continue to meet the alignment requirements for
* coalescing as the address is updated from row to row. {@code ElementSizeBytes} specifies the size of the largest reads and writes that will be
* performed on the memory range. {@code ElementSizeBytes} may be 4, 8 or 16 (since coalesced memory transactions are not possible on other data sizes).
* If {@code ElementSizeBytes} is smaller than the actual read/write size of a kernel, the kernel will run correctly, but possibly at reduced speed. The
* pitch returned in {@code *pPitch} by {@code cuMemAllocPitch()} is the width in bytes of the allocation. The intended usage of pitch is as a separate
* parameter of the allocation, used to compute addresses within the 2D array. Given the row and column of an array element of type T, the address
* is computed as:
*
*
* T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
*
* The pitch returned by {@code cuMemAllocPitch()} is guaranteed to work with {@link #cuMemcpy2D Memcpy2D} under all circumstances. For allocations of 2D arrays, it is
* recommended that programmers consider performing pitch allocations using {@code cuMemAllocPitch()}. Due to alignment restrictions in the hardware, this
* is especially true if the application will be performing 2D memory copies between different regions of device memory (whether linear memory or CUDA
* arrays).
*
* The byte alignment of the pitch returned by {@code cuMemAllocPitch()} is guaranteed to match or exceed the alignment requirement for texture binding
* with {@link #cuTexRefSetAddress2D TexRefSetAddress2D}.
*
* @param dptr returned device pointer
* @param pPitch returned pitch of allocation in bytes
* @param WidthInBytes requested allocation width in bytes
* @param Height requested allocation height in rows
* @param ElementSizeBytes size of largest reads/writes for range
*/
@NativeType("CUresult")
public static int cuMemAllocPitch(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t *") PointerBuffer pPitch, @NativeType("size_t") long WidthInBytes, @NativeType("size_t") long Height, @NativeType("unsigned int") int ElementSizeBytes) {
if (CHECKS) {
check(dptr, 1);
check(pPitch, 1);
}
return ncuMemAllocPitch(memAddress(dptr), memAddress(pPitch), WidthInBytes, Height, ElementSizeBytes);
}
// --- [ cuMemFree ] ---
/**
* Frees device memory.
*
* Frees the memory space pointed to by {@code dptr}, which must have been returned by a previous call to {@link #cuMemAlloc MemAlloc} or {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dptr pointer to memory to free
*/
@NativeType("CUresult")
public static int cuMemFree(@NativeType("CUdeviceptr") long dptr) {
long __functionAddress = Functions.MemFree;
if (CHECKS) {
check(dptr);
}
return callPI(dptr, __functionAddress);
}
// --- [ cuMemGetAddressRange ] ---
/** Unsafe version of: {@link #cuMemGetAddressRange MemGetAddressRange} */
public static int ncuMemGetAddressRange(long pbase, long psize, long dptr) {
long __functionAddress = Functions.MemGetAddressRange;
if (CHECKS) {
check(dptr);
}
return callPPPI(pbase, psize, dptr, __functionAddress);
}
/**
* Get information on memory allocations.
*
* Returns the base address in {@code *pbase} and size in {@code *psize} of the allocation by {@link #cuMemAlloc MemAlloc} or {@link #cuMemAllocPitch MemAllocPitch} that contains the input
* pointer {@code dptr}. Both parameters {@code pbase} and {@code psize} are optional. If one of them is {@code NULL}, it is ignored.
*
* @param pbase returned base address
* @param psize returned size of device memory allocation
* @param dptr device pointer to query
*/
@NativeType("CUresult")
public static int cuMemGetAddressRange(@Nullable @NativeType("CUdeviceptr *") PointerBuffer pbase, @Nullable @NativeType("size_t *") PointerBuffer psize, @NativeType("CUdeviceptr") long dptr) {
if (CHECKS) {
checkSafe(pbase, 1);
checkSafe(psize, 1);
}
return ncuMemGetAddressRange(memAddressSafe(pbase), memAddressSafe(psize), dptr);
}
// --- [ cuMemAllocHost ] ---
/** Unsafe version of: {@link #cuMemAllocHost MemAllocHost} */
public static int ncuMemAllocHost(long pp, long bytesize) {
long __functionAddress = Functions.MemAllocHost;
return callPPI(pp, bytesize, __functionAddress);
}
/**
* Allocates page-locked host memory.
*
* Allocates {@code bytesize} bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated
* with this function and automatically accelerates calls to functions such as {@link #cuMemcpy Memcpy}. Since the memory can be accessed directly by the device, it can
* be read or written with much higher bandwidth than pageable memory obtained with functions such as {@code malloc()}. Allocating excessive amounts of
* memory with {@code cuMemAllocHost()} may degrade system performance, since it reduces the amount of memory available to the system for paging. As a
* result, this function is best used sparingly to allocate staging areas for data exchange between host and device.
*
* Note all host memory allocated using {@code cuMemHostAlloc()} will automatically be immediately accessible to all contexts on all devices which support
* unified addressing (as may be queried using {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}). The device pointer that may be used to access this host memory from
* those contexts is always equal to the returned host pointer {@code *pp}. See {@code CUDA_UNIFIED} for additional details.
*
* @param pp returned host pointer to page-locked memory
* @param bytesize requested allocation size in bytes
*/
@NativeType("CUresult")
public static int cuMemAllocHost(@NativeType("void **") PointerBuffer pp, @NativeType("size_t") long bytesize) {
if (CHECKS) {
check(pp, 1);
}
return ncuMemAllocHost(memAddress(pp), bytesize);
}
// --- [ cuMemFreeHost ] ---
/** Unsafe version of: {@link #cuMemFreeHost MemFreeHost} */
public static int ncuMemFreeHost(long p) {
long __functionAddress = Functions.MemFreeHost;
return callPI(p, __functionAddress);
}
/**
* Frees page-locked host memory.
*
* Frees the memory space pointed to by {@code p}, which must have been returned by a previous call to {@link #cuMemAllocHost MemAllocHost}.
*
* @param p pointer to memory to free
*/
@NativeType("CUresult")
public static int cuMemFreeHost(@NativeType("void *") ByteBuffer p) {
return ncuMemFreeHost(memAddress(p));
}
// --- [ cuMemHostAlloc ] ---
/** Unsafe version of: {@link #cuMemHostAlloc MemHostAlloc} */
public static int ncuMemHostAlloc(long pp, long bytesize, int Flags) {
long __functionAddress = Functions.MemHostAlloc;
return callPPI(pp, bytesize, Flags, __functionAddress);
}
/**
* Allocates page-locked host memory.
*
* Allocates {@code bytesize} bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated
* with this function and automatically accelerates calls to functions such as {@link #cuMemcpyHtoD MemcpyHtoD}. Since the memory can be accessed directly by the device,
* it can be read or written with much higher bandwidth than pageable memory obtained with functions such as {@code malloc()}. Allocating excessive
* amounts of pinned memory may degrade system performance, since it reduces the amount of memory available to the system for paging. As a result, this
* function is best used sparingly to allocate staging areas for data exchange between host and device.
*
* The {@code Flags} parameter enables different options to be specified that affect the allocation, as follows:
*
*
* - {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE}: The memory returned by this call will be considered as pinned memory by all CUDA contexts, not just the one that performed
* the allocation.
* - {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP}: Maps the allocation into the CUDA address space. The device pointer to the memory may be obtained by calling
* {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.
* - {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED}: Allocates the memory as write-combined (WC). WC memory can be transferred across the PCI Express bus more quickly on
* some system configurations, but cannot be read efficiently by most CPUs. WC memory is a good option for buffers that will be written by the CPU and
* read by the GPU via mapped pinned memory or host->device transfers.
*
*
* All of these flags are orthogonal to one another: a developer may allocate memory that is portable, mapped and/or write-combined with no restrictions.
*
* The {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} flag may be specified on CUDA contexts for devices that do not support mapped pinned memory. The failure is deferred to
* {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} because the memory may be mapped into other CUDA contexts via the {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE} flag.
*
* The memory allocated by this function must be freed with {@link #cuMemFreeHost MemFreeHost}.
*
* Note all host memory allocated using {@code cuMemHostAlloc()} will automatically be immediately accessible to all contexts on all devices which support
* unified addressing (as may be queried using {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}). Unless the flag {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} is specified, the device
* pointer that may be used to access this host memory from those contexts is always equal to the returned host pointer {@code *pp}. If the flag
* {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} is specified, then the function {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} must be used to query the device pointer, even if the context
* supports unified addressing. See {@code CUDA_UNIFIED} for additional details.
*
* @param pp returned host pointer to page-locked memory
* @param bytesize requested allocation size in bytes
* @param Flags flags for allocation request
*/
@NativeType("CUresult")
public static int cuMemHostAlloc(@NativeType("void **") PointerBuffer pp, @NativeType("size_t") long bytesize, @NativeType("unsigned int") int Flags) {
if (CHECKS) {
check(pp, 1);
}
return ncuMemHostAlloc(memAddress(pp), bytesize, Flags);
}
// --- [ cuMemHostGetDevicePointer ] ---
/** Unsafe version of: {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} */
public static int ncuMemHostGetDevicePointer(long pdptr, long p, int Flags) {
long __functionAddress = Functions.MemHostGetDevicePointer;
return callPPI(pdptr, p, Flags, __functionAddress);
}
/**
* Passes back device pointer of mapped pinned memory.
*
* Passes back the device pointer {@code pdptr} corresponding to the mapped, pinned host buffer {@code p} allocated by {@link #cuMemHostAlloc MemHostAlloc}.
*
* {@code cuMemHostGetDevicePointer()} will fail if the {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} flag was not specified at the time the memory was allocated, or if the
* function is called on a GPU that does not support mapped pinned memory.
*
* For devices that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM}, the memory can also be
* accessed from the device using the host pointer {@code p}. The device pointer returned by {@code cuMemHostGetDevicePointer()} may or may not match the
* original host pointer {@code p} and depends on the devices visible to the application. If all devices visible to the application have a non-zero value
* for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will match the original pointer {@code p}. If any device
* visible to the application has a zero value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will not match
* the original host pointer {@code p}, but it will be suitable for use on all devices provided Unified Virtual Addressing is enabled. In such systems, it
* is valid to access the memory using either pointer on devices that have a non-zero value for the device attribute. Note however that such devices
* should access the memory using only of the two pointers and not both.
*
* {@code Flags} provides for future releases. For now, it must be set to 0.
*
* @param pdptr returned device pointer
* @param p host pointer
* @param Flags options (must be 0)
*/
@NativeType("CUresult")
public static int cuMemHostGetDevicePointer(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("void *") ByteBuffer p, @NativeType("unsigned int") int Flags) {
if (CHECKS) {
check(pdptr, 1);
}
return ncuMemHostGetDevicePointer(memAddress(pdptr), memAddress(p), Flags);
}
// --- [ cuMemHostGetFlags ] ---
/** Unsafe version of: {@link #cuMemHostGetFlags MemHostGetFlags} */
public static int ncuMemHostGetFlags(long pFlags, long p) {
long __functionAddress = Functions.MemHostGetFlags;
return callPPI(pFlags, p, __functionAddress);
}
/**
* Passes back flags that were used for a pinned allocation
*
* Passes back the flags {@code pFlags} that were specified when allocating the pinned host buffer {@code p} allocated by {@link #cuMemHostAlloc MemHostAlloc}.
*
* {@code cuMemHostGetFlags()} will fail if the pointer does not reside in an allocation performed by {@link #cuMemAllocHost MemAllocHost} or {@code cuMemHostAlloc()}.
*
* @param pFlags returned flags word
* @param p host pointer
*/
@NativeType("CUresult")
public static int cuMemHostGetFlags(@NativeType("unsigned int *") IntBuffer pFlags, @NativeType("void *") ByteBuffer p) {
if (CHECKS) {
check(pFlags, 1);
}
return ncuMemHostGetFlags(memAddress(pFlags), memAddress(p));
}
// --- [ cuMemAllocManaged ] ---
/** Unsafe version of: {@link #cuMemAllocManaged MemAllocManaged} */
public static int ncuMemAllocManaged(long dptr, long bytesize, int flags) {
long __functionAddress = Functions.MemAllocManaged;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(dptr, bytesize, flags, __functionAddress);
}
/**
* Allocates memory that will be automatically managed by the Unified Memory system.
*
* Allocates {@code bytesize} bytes of managed memory on the device and returns in {@code *dptr} a pointer to the allocated memory. If the device doesn't
* support allocating managed memory, {@link #CUDA_ERROR_NOT_SUPPORTED} is returned. Support for managed memory can be queried using the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY DEVICE_ATTRIBUTE_MANAGED_MEMORY}. The allocated memory is suitably aligned for any kind of variable. The memory is not cleared. If {@code bytesize}
* is 0, {@link #cuMemAllocManaged MemAllocManaged} returns {@link #CUDA_ERROR_INVALID_VALUE}. The pointer is valid on the CPU and on all GPUs in the system that support managed memory.
* All accesses to this pointer must obey the Unified Memory programming model.
*
* {@code flags} specifies the default stream association for this allocation. {@code flags} must be one of {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} or {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST}. If
* {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} is specified, then this memory is accessible from any stream on any device. If {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} is specified, then the allocation
* should not be accessed from devices that have a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}; an explicit call to
* {@link #cuStreamAttachMemAsync StreamAttachMemAsync} will be required to enable access on such devices.
*
* If the association is later changed via {@link #cuStreamAttachMemAsync StreamAttachMemAsync} to a single stream, the default association as specifed during {@link #cuMemAllocManaged MemAllocManaged} is
* restored when that stream is destroyed. For __managed__ variables, the default association is always {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL}. Note that destroying a stream
* is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has completed.
*
* Memory allocated with {@link #cuMemAllocManaged MemAllocManaged} should be released with {@link #cuMemFree MemFree}.
*
* Device memory oversubscription is possible for GPUs that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}.
* Managed memory on such GPUs may be evicted from device memory to host memory at any time by the Unified Memory driver in order to make room for other
* allocations.
*
* In a multi-GPU system where all GPUs have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}, managed memory may
* not be populated when this API returns and instead may be populated on access. In such systems, managed memory can migrate to any processor's memory at
* any time. The Unified Memory driver will employ heuristics to maintain data locality and prevent excessive page faults to the extent possible. The
* application can also guide the driver about memory usage patterns via {@link #cuMemAdvise MemAdvise}. The application can also explicitly migrate memory to a desired
* processor's memory via {@link #cuMemPrefetchAsync MemPrefetchAsync}.
*
* In a multi-GPU system where all of the GPUs have a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} and all the GPUs
* have peer-to-peer support with each other, the physical storage for managed memory is created on the GPU which is active at the time
* {@link #cuMemAllocManaged MemAllocManaged} is called. All other GPUs will reference the data at reduced bandwidth via peer mappings over the PCIe bus. The Unified Memory
* driver does not migrate memory among such GPUs.
*
* In a multi-GPU system where not all GPUs have peer-to-peer support with each other and where the value of the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} is zero for at least one of those GPUs, the location chosen for physical storage of managed memory is
* system-dependent.
*
*
* - On Linux, the location chosen will be device memory as long as the current set of active contexts are on devices that either have peer-to-peer
* support with each other or have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If there is an active
* context on a GPU that does not have a non-zero value for that device attribute and it does not have peer-to-peer support with the other devices
* that have active contexts on them, then the location for physical storage will be 'zero-copy' or host memory. Note that this means that managed
* memory that is located in device memory is migrated to host memory if a new context is created on a GPU that doesn't have a non-zero value for the
* device attribute and does not support peer-to-peer with at least one of the other devices that has an active context. This in turn implies that
* context creation may fail if there is insufficient host memory to migrate all managed allocations.
* - On Windows, the physical storage is always created in 'zero-copy' or host memory. All GPUs will reference the data at reduced bandwidth over the
* PCIe bus. In these circumstances, use of the environment variable {@code CUDA_VISIBLE_DEVICES} is recommended to restrict CUDA to only use those
* GPUs that have peer-to-peer support. Alternatively, users can also set {@code CUDA_MANAGED_FORCE_DEVICE_ALLOC} to a non-zero value to force the
* driver to always use device memory for physical storage. When this environment variable is set to a non-zero value, all contexts created in that
* process on devices that support managed memory have to be peer-to-peer compatible with each other. Context creation will fail if a context is
* created on a device that supports managed memory and is not peer-to-peer compatible with any of the other managed memory supporting devices on
* which contexts were previously created, even if those contexts have been destroyed. These environment variables are described in the CUDA
* programming guide under the "CUDA environment variables" section.
* - On ARM, managed memory is not available on discrete gpu with Drive PX-2.
*
*
* @param dptr returned device pointer
* @param bytesize requested allocation size in bytes
* @param flags must be one of {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} or {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST}
*/
@NativeType("CUresult")
public static int cuMemAllocManaged(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(dptr, 1);
}
return ncuMemAllocManaged(memAddress(dptr), bytesize, flags);
}
// --- [ cuDeviceGetByPCIBusId ] ---
/** Unsafe version of: {@link #cuDeviceGetByPCIBusId DeviceGetByPCIBusId} */
public static int ncuDeviceGetByPCIBusId(long dev, long pciBusId) {
long __functionAddress = Functions.DeviceGetByPCIBusId;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(dev, pciBusId, __functionAddress);
}
/**
* Returns a handle to a compute device.
*
* Returns in {@code *device} a device handle given a PCI bus ID string.
*
* @param dev returned device handle
* @param pciBusId string in one of the following forms: {@code [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function]} where
* {@code domain}, {@code bus}, {@code device}, and {@code function} are all hexadecimal values
*/
@NativeType("CUresult")
public static int cuDeviceGetByPCIBusId(@NativeType("CUdevice *") IntBuffer dev, @NativeType("char const *") ByteBuffer pciBusId) {
if (CHECKS) {
check(dev, 1);
checkNT1(pciBusId);
}
return ncuDeviceGetByPCIBusId(memAddress(dev), memAddress(pciBusId));
}
/**
* Returns a handle to a compute device.
*
* Returns in {@code *device} a device handle given a PCI bus ID string.
*
* @param dev returned device handle
* @param pciBusId string in one of the following forms: {@code [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function]} where
* {@code domain}, {@code bus}, {@code device}, and {@code function} are all hexadecimal values
*/
@NativeType("CUresult")
public static int cuDeviceGetByPCIBusId(@NativeType("CUdevice *") IntBuffer dev, @NativeType("char const *") CharSequence pciBusId) {
if (CHECKS) {
check(dev, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nASCII(pciBusId, true);
long pciBusIdEncoded = stack.getPointerAddress();
return ncuDeviceGetByPCIBusId(memAddress(dev), pciBusIdEncoded);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuDeviceGetPCIBusId ] ---
/**
* Unsafe version of: {@link #cuDeviceGetPCIBusId DeviceGetPCIBusId}
*
* @param len maximum length of string to store in {@code name}
*/
public static int ncuDeviceGetPCIBusId(long pciBusId, int len, int dev) {
long __functionAddress = Functions.DeviceGetPCIBusId;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pciBusId, len, dev, __functionAddress);
}
/**
* Returns a PCI Bus Id string for the device.
*
* Returns an ASCII string identifying the device {@code dev} in the NULL-terminated string pointed to by {@code pciBusId}. {@code len} specifies the
* maximum length of the string that may be returned.
*
* @param pciBusId returned identifier string for the device in the following format {@code [domain]:[bus]:[device].[function]} where {@code domain}, {@code bus},
* {@code device}, and {@code function} are all hexadecimal values. {@code pciBusId} should be large enough to store 13 characters including the
* NULL-terminator.
* @param dev device to get identifier string for
*/
@NativeType("CUresult")
public static int cuDeviceGetPCIBusId(@NativeType("char *") ByteBuffer pciBusId, @NativeType("CUdevice") int dev) {
return ncuDeviceGetPCIBusId(memAddress(pciBusId), pciBusId.remaining(), dev);
}
// --- [ cuIpcGetEventHandle ] ---
/** Unsafe version of: {@link #cuIpcGetEventHandle IpcGetEventHandle} */
public static int ncuIpcGetEventHandle(long pHandle, long event) {
long __functionAddress = Functions.IpcGetEventHandle;
if (CHECKS) {
check(__functionAddress);
check(event);
}
return callPPI(pHandle, event, __functionAddress);
}
/**
* Gets an interprocess handle for a previously allocated event.
*
* Takes as input a previously allocated event. This event must have been created with the {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} and {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flags set. This
* opaque handle may be copied into other processes and opened with {@link #cuIpcOpenEventHandle IpcOpenEventHandle} to allow efficient hardware
* synchronization between GPU work in different processes.
*
* After the event has been opened in the importing process, {@link #cuEventRecord EventRecord}, {@link #cuEventSynchronize EventSynchronize}, {@link #cuStreamWaitEvent StreamWaitEvent} and {@link #cuEventQuery EventQuery} may be used in
* either process. Performing operations on the imported event after the exported event has been freed with {@link #cuEventDestroy EventDestroy} will result in undefined
* behavior.
*
* IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is
* restricted to GPUs in TCC mode.
*
* @param pHandle pointer to a user allocated {@code CUipcEventHandle} in which to return the opaque event handle
* @param event event allocated with {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} and {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flags
*/
@NativeType("CUresult")
public static int cuIpcGetEventHandle(@NativeType("CUipcEventHandle *") CUIPCEventHandle pHandle, @NativeType("CUevent") long event) {
return ncuIpcGetEventHandle(pHandle.address(), event);
}
// --- [ cuIpcOpenEventHandle$Address ] ---
@NativeType("CUresult")
private static int cuIpcOpenEventHandle$Address() {
long __functionAddress = Functions.IpcOpenEventHandle$Address;
if (CHECKS) {
check(__functionAddress);
}
return callI(__functionAddress);
}
// --- [ cuIpcGetMemHandle ] ---
/** Unsafe version of: {@link #cuIpcGetMemHandle IpcGetMemHandle} */
public static int ncuIpcGetMemHandle(long pHandle, long dptr) {
long __functionAddress = Functions.IpcGetMemHandle;
if (CHECKS) {
check(__functionAddress);
check(dptr);
}
return callPPI(pHandle, dptr, __functionAddress);
}
/**
* Gets an interprocess memory handle for an existing device memory allocation.
*
* Takes a pointer to the base of an existing device memory allocation created with {@link #cuMemAlloc MemAlloc} and exports it for use in another process. This is a
* lightweight operation and may be called multiple times on an allocation without adverse effects.
*
* If a region of memory is freed with {@link #cuMemFree MemFree} and a subsequent call to {@link #cuMemAlloc MemAlloc} returns memory with the same device address, {@link #cuIpcGetMemHandle IpcGetMemHandle}
* will return a unique handle for the new memory.
*
* IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is
* restricted to GPUs in TCC mode.
*
* @param pHandle pointer to user allocated {@code CUipcMemHandle} to return the handle in
* @param dptr base pointer to previously allocated device memory
*/
@NativeType("CUresult")
public static int cuIpcGetMemHandle(@NativeType("CUipcMemHandle *") CUIPCMemHandle pHandle, @NativeType("CUdeviceptr") long dptr) {
return ncuIpcGetMemHandle(pHandle.address(), dptr);
}
// --- [ cuIpcOpenMemHandle$Address ] ---
@NativeType("CUresult")
private static int cuIpcOpenMemHandle$Address() {
long __functionAddress = Functions.IpcOpenMemHandle$Address;
if (CHECKS) {
check(__functionAddress);
}
return callI(__functionAddress);
}
// --- [ cuIpcCloseMemHandle ] ---
/**
* Attempts to close memory mapped with {@link #cuIpcOpenMemHandle IpcOpenMemHandle}.
*
* Decrements the reference count of the memory returned by {@code cuIpcOpenMemHandle()} by 1. When the reference count reaches 0, this API unmaps the
* memory. The original allocation in the exporting process as well as imported mappings in other processes will be unaffected.
*
* Any resources used to enable peer access will be freed if this is the last mapping using them.
*
* IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is
* restricted to GPUs in TCC mode
*
* @param dptr device pointer returned by {@code cuIpcOpenMemHandle()}
*/
@NativeType("CUresult")
public static int cuIpcCloseMemHandle(@NativeType("CUdeviceptr") long dptr) {
long __functionAddress = Functions.IpcCloseMemHandle;
if (CHECKS) {
check(__functionAddress);
check(dptr);
}
return callPI(dptr, __functionAddress);
}
// --- [ cuMemHostRegister ] ---
/**
* Unsafe version of: {@link #cuMemHostRegister MemHostRegister}
*
* @param bytesize size in bytes of the address range to page-lock
*/
public static int ncuMemHostRegister(long p, long bytesize, int Flags) {
long __functionAddress = Functions.MemHostRegister;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(p, bytesize, Flags, __functionAddress);
}
/**
* Registers an existing host memory range for use by CUDA.
*
* Page-locks the memory range specified by {@code p} and {@code bytesize} and maps it for the device(s) as specified by {@code Flags}. This memory range
* also is added to the same tracking mechanism as {@link #cuMemHostAlloc MemHostAlloc} to automatically accelerate calls to functions such as {@link #cuMemcpyHtoD MemcpyHtoD}. Since the memory
* can be accessed directly by the device, it can be read or written with much higher bandwidth than pageable memory that has not been registered.
* Page-locking excessive amounts of memory may degrade system performance, since it reduces the amount of memory available to the system for paging. As a
* result, this function is best used sparingly to register staging areas for data exchange between host and device.
*
* This function has limited support on Mac OS X. OS 10.7 or higher is required.
*
* All flags are orthogonal to one another: a developer may page-lock memory that is portable or mapped with no restrictions.
*
* The {@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP} flag may be specified on CUDA contexts for devices that do not support mapped pinned memory. The failure is deferred to
* {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} because the memory may be mapped into other CUDA contexts via the {@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE} flag.
*
* For devices that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM}, the memory can also be
* accessed from the device using the host pointer {@code p}. The device pointer returned by {@code cuMemHostGetDevicePointer()} may or may not match the
* original host pointer {@code ptr} and depends on the devices visible to the application. If all devices visible to the application have a non-zero
* value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will match the original pointer {@code ptr}. If any
* device visible to the application has a zero value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will
* not match the original host pointer {@code ptr}, but it will be suitable for use on all devices provided Unified Virtual Addressing is enabled. In such
* systems, it is valid to access the memory using either pointer on devices that have a non-zero value for the device attribute. Note however that such
* devices should access the memory using only of the two pointers and not both.
*
* The memory page-locked by this function must be unregistered with {@link #cuMemHostUnregister MemHostUnregister}.
*
* @param p host pointer to memory to page-lock
* @param Flags flags for allocation request. One or more of:
{@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE} {@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP} {@link #CU_MEMHOSTREGISTER_IOMEMORY MEMHOSTREGISTER_IOMEMORY} {@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY}
*/
@NativeType("CUresult")
public static int cuMemHostRegister(@NativeType("void *") ByteBuffer p, @NativeType("unsigned int") int Flags) {
return ncuMemHostRegister(memAddress(p), p.remaining(), Flags);
}
// --- [ cuMemHostUnregister ] ---
/** Unsafe version of: {@link #cuMemHostUnregister MemHostUnregister} */
public static int ncuMemHostUnregister(long p) {
long __functionAddress = Functions.MemHostUnregister;
if (CHECKS) {
check(__functionAddress);
}
return callPI(p, __functionAddress);
}
/**
* Unregisters a memory range that was registered with {@link #cuMemHostRegister MemHostRegister}.
*
* Unmaps the memory range whose base address is specified by {@code p}, and makes it pageable again.
*
* The base address must be the same one specified to {@link #cuMemHostRegister MemHostRegister}.
*
* @param p host pointer to memory to unregister
*/
@NativeType("CUresult")
public static int cuMemHostUnregister(@NativeType("void *") ByteBuffer p) {
return ncuMemHostUnregister(memAddress(p));
}
// --- [ cuMemcpy ] ---
/**
* Copies memory.
*
* Copies data between two pointers. {@code dst} and {@code src} are base pointers of the destination and source, respectively. {@code ByteCount}
* specifies the number of bytes to copy. Note that this function infers the type of the transfer (host to host, host to device, device to device, or
* device to host) from the pointer values. This function is only allowed in contexts which support unified addressing.
*
* @param dst destination unified virtual address space pointer
* @param src source unified virtual address space pointer
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpy(@NativeType("CUdeviceptr") long dst, @NativeType("CUdeviceptr") long src, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.Memcpy;
if (CHECKS) {
check(__functionAddress);
check(dst);
check(src);
}
return callPPPI(dst, src, ByteCount, __functionAddress);
}
// --- [ cuMemcpyPeer ] ---
/**
* Copies device memory between two contexts.
*
* Copies from device memory in one context to device memory in another context. {@code dstDevice} is the base device pointer of the destination memory
* and {@code dstContext} is the destination context. {@code srcDevice} is the base device pointer of the source memory and {@code srcContext} is the
* source pointer. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param dstContext destination context
* @param srcDevice source device pointer
* @param srcContext source context
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpyPeer(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUcontext") long dstContext, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUcontext") long srcContext, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.MemcpyPeer;
if (CHECKS) {
check(__functionAddress);
check(dstDevice);
check(dstContext);
check(srcDevice);
check(srcContext);
}
return callPPPPPI(dstDevice, dstContext, srcDevice, srcContext, ByteCount, __functionAddress);
}
// --- [ cuMemcpyHtoD ] ---
/**
* Unsafe version of: {@link #cuMemcpyHtoD MemcpyHtoD}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyHtoD(long dstDevice, long srcHost, long ByteCount) {
long __functionAddress = Functions.MemcpyHtoD;
if (CHECKS) {
check(dstDevice);
}
return callPPPI(dstDevice, srcHost, ByteCount, __functionAddress);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ByteBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), srcHost.remaining());
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ShortBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") IntBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") LongBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") FloatBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") DoubleBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") PointerBuffer srcHost) {
return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT);
}
// --- [ cuMemcpyDtoH ] ---
/**
* Unsafe version of: {@link #cuMemcpyDtoH MemcpyDtoH}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyDtoH(long dstHost, long srcDevice, long ByteCount) {
long __functionAddress = Functions.MemcpyDtoH;
if (CHECKS) {
check(srcDevice);
}
return callPPPI(dstHost, srcDevice, ByteCount, __functionAddress);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, dstHost.remaining());
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 1);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") IntBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") LongBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
*/
@NativeType("CUresult")
public static int cuMemcpyDtoH(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) {
return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT);
}
// --- [ cuMemcpyDtoD ] ---
/**
* Copies memory from Device to Device.
*
* Copies from device memory to device memory. {@code dstDevice} and {@code srcDevice} are the base pointers of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcDevice source device pointer
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpyDtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.MemcpyDtoD;
if (CHECKS) {
check(dstDevice);
check(srcDevice);
}
return callPPPI(dstDevice, srcDevice, ByteCount, __functionAddress);
}
// --- [ cuMemcpyDtoA ] ---
/**
* Copies memory from Device to Array.
*
* Copies from device memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting index of the
* destination data. {@code srcDevice} specifies the base pointer of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcDevice source device pointer
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpyDtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.MemcpyDtoA;
if (CHECKS) {
check(dstArray);
check(srcDevice);
}
return callPPPPI(dstArray, dstOffset, srcDevice, ByteCount, __functionAddress);
}
// --- [ cuMemcpyAtoD ] ---
/**
* Copies memory from Array to Device.
*
* Copies from one 1D CUDA array to device memory. {@code dstDevice} specifies the base pointer of the destination and must be naturally aligned with the
* CUDA array elements. {@code srcArray} and {@code srcOffset} specify the CUDA array handle and the offset in bytes into the array where the copy is to
* begin. {@code ByteCount} specifies the number of bytes to copy and must be evenly divisible by the array element size.
*
* @param dstDevice destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpyAtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.MemcpyAtoD;
if (CHECKS) {
check(dstDevice);
check(srcArray);
}
return callPPPPI(dstDevice, srcArray, srcOffset, ByteCount, __functionAddress);
}
// --- [ cuMemcpyHtoA ] ---
/**
* Unsafe version of: {@link #cuMemcpyHtoA MemcpyHtoA}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyHtoA(long dstArray, long dstOffset, long srcHost, long ByteCount) {
long __functionAddress = Functions.MemcpyHtoA;
if (CHECKS) {
check(dstArray);
}
return callPPPPI(dstArray, dstOffset, srcHost, ByteCount, __functionAddress);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ByteBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), srcHost.remaining());
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ShortBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") IntBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") LongBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") FloatBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") DoubleBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
*/
@NativeType("CUresult")
public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") PointerBuffer srcHost) {
return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT);
}
// --- [ cuMemcpyAtoH ] ---
/**
* Unsafe version of: {@link #cuMemcpyAtoH MemcpyAtoH}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyAtoH(long dstHost, long srcArray, long srcOffset, long ByteCount) {
long __functionAddress = Functions.MemcpyAtoH;
if (CHECKS) {
check(srcArray);
}
return callPPPPI(dstHost, srcArray, srcOffset, ByteCount, __functionAddress);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, dstHost.remaining());
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 1);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") IntBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") LongBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination device pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
*/
@NativeType("CUresult")
public static int cuMemcpyAtoH(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) {
return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT);
}
// --- [ cuMemcpyAtoA ] ---
/**
* Copies memory from Array to Array.
*
* Copies from one 1D CUDA array to another. {@code dstArray} and {@code srcArray} specify the handles of the destination and source CUDA arrays for the
* copy, respectively. {@code dstOffset} and {@code srcOffset} specify the destination and source offsets in bytes into the CUDA arrays. {@code ByteCount}
* is the number of bytes to be copied. The size of the elements in the CUDA arrays need not be the same format, but the elements must be the same size;
* and count must be evenly divisible by that size.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param ByteCount size of memory copy in bytes
*/
@NativeType("CUresult")
public static int cuMemcpyAtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("size_t") long ByteCount) {
long __functionAddress = Functions.MemcpyAtoA;
if (CHECKS) {
check(dstArray);
check(srcArray);
}
return callPPPPPI(dstArray, dstOffset, srcArray, srcOffset, ByteCount, __functionAddress);
}
// --- [ cuMemcpy2D ] ---
/** Unsafe version of: {@link #cuMemcpy2D Memcpy2D} */
public static int ncuMemcpy2D(long pCopy) {
long __functionAddress = Functions.Memcpy2D;
return callPI(pCopy, __functionAddress);
}
/**
* Copies memory for 2D arrays.
*
* Perform a 2D memory copy according to the parameters specified in {@code pCopy}.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per
* row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the
* bytes per row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and
* {@code srcPitch} are ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the
* bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and
* the bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and
* {@code dstPitch} are ignored.
*
* {@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.
*
* For host pointers, the starting address is
*
*
* voidStart = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.
*
* {@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.
*
* For host pointers, the base address is
*
*
* voiddstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.
*
* {@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* {@code cuMemcpy2D()} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@code cuMemAllocPitch}() passes back
* pitches that always work with {@code cuMemcpy2D()}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array),
* {@code cuMemcpy2D()} may fail for pitches not computed by {@link #cuMemAllocPitch MemAllocPitch}. {@link #cuMemcpy2DUnaligned Memcpy2DUnaligned} does not have this restriction, but may run
* significantly slower in the cases where {@code cuMemcpy2D()} would have returned an error code.
*
* @param pCopy parameters for the memory copy
*/
@NativeType("CUresult")
public static int cuMemcpy2D(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy) {
return ncuMemcpy2D(pCopy.address());
}
// --- [ cuMemcpy2DUnaligned ] ---
/** Unsafe version of: {@link #cuMemcpy2DUnaligned Memcpy2DUnaligned} */
public static int ncuMemcpy2DUnaligned(long pCopy) {
long __functionAddress = Functions.Memcpy2DUnaligned;
return callPI(pCopy, __functionAddress);
}
/**
* Copies memory for 2D arrays.
*
* Perform a 2D memory copy according to the parameters specified in {@code pCopy}.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per
* row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the
* bytes per row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and
* {@code srcPitch} are ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the
* bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and
* the bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and
* {@code dstPitch} are ignored.
*
* {@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.
*
* For host pointers, the starting address is
*
*
* void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.
*
* {@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.
*
* For host pointers, the base address is
*
*
* void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.
*
* {@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* {@link #cuMemcpy2D Memcpy2D} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@link #cuMemAllocPitch MemAllocPitch} passes back pitches that
* always work with {@code cuMemcpy2D()}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array),
* {@code cuMemcpy2D()} may fail for pitches not computed by {@code cuMemAllocPitch()}. {@code cuMemcpy2DUnaligned()} does not have this restriction, but
* may run significantly slower in the cases where {@code cuMemcpy2D()} would have returned an error code.
*
* @param pCopy parameters for the memory copy
*/
@NativeType("CUresult")
public static int cuMemcpy2DUnaligned(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy) {
return ncuMemcpy2DUnaligned(pCopy.address());
}
// --- [ cuMemcpy3D ] ---
/** Unsafe version of: {@link #cuMemcpy3D Memcpy3D} */
public static int ncuMemcpy3D(long pCopy) {
long __functionAddress = Functions.Memcpy3D;
return callPI(pCopy, __functionAddress);
}
/**
* Copies memory for 3D arrays.
*
* Perform a 3D memory copy according to the parameters specified in {@code pCopy}.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost}, {@code srcPitch} and {@code srcHeight} specify the (host) base address of the source
* data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice}, {@code srcPitch} and {@code srcHeight} specify the (device) base address of the
* source data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice},
* {@code srcPitch} and {@code srcHeight} are ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data, the bytes
* per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data, the
* bytes per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice},
* {@code dstPitch} and {@code dstHeight} are ignored.
*
* {@code srcXInBytes}, {@code srcY} and {@code srcZ} specify the base address of the source data for the copy.
*
* For host pointers, the starting address is
*
*
* void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*
* For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.
*
* {@code dstXInBytes}, {@code dstY} and {@code dstZ} specify the base address of the destination data for the copy.
*
* For host pointers, the base address is
*
*
* void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*
* For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.
*
* {@code WidthInBytes}, {@code Height} and {@code Depth} specify the width (in bytes), height and depth of the 3D copy being performed.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to
* {@code Height} + {@code dstY}.
*
* {@link #cuMemcpy3D Memcpy3D} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}).
*
* The {@code srcLOD} and {@code dstLOD} members of the {@code CUDA_MEMCPY3D} structure must be set to 0.
*
* Note
*
* _sync
*
* @param pCopy parameters for the memory copy
*/
@NativeType("CUresult")
public static int cuMemcpy3D(@NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D pCopy) {
return ncuMemcpy3D(pCopy.address());
}
// --- [ cuMemcpy3DPeer ] ---
/** Unsafe version of: {@link #cuMemcpy3DPeer Memcpy3DPeer} */
public static int ncuMemcpy3DPeer(long pCopy) {
long __functionAddress = Functions.Memcpy3DPeer;
if (CHECKS) {
check(__functionAddress);
}
return callPI(pCopy, __functionAddress);
}
/**
* Copies memory between contexts.
*
* Perform a 3D memory copy according to the parameters specified in {@code pCopy}.
*
* @param pCopy parameters for the memory copy
*/
@NativeType("CUresult")
public static int cuMemcpy3DPeer(@NativeType("CUDA_MEMCPY3D_PEER const *") CUDA_MEMCPY3D_PEER pCopy) {
return ncuMemcpy3DPeer(pCopy.address());
}
// --- [ cuMemcpyAsync ] ---
/**
* Copies memory asynchronously.
*
* Copies data between two pointers. {@code dst} and {@code src} are base pointers of the destination and source, respectively. {@code ByteCount}
* specifies the number of bytes to copy. Note that this function infers the type of the transfer (host to host, host to device, device to device, or
* device to host) from the pointer values. This function is only allowed in contexts which support unified addressing.
*
* @param dst destination unified virtual address space pointer
* @param src source unified virtual address space pointer
* @param ByteCount size of memory copy in bytes
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAsync(@NativeType("CUdeviceptr") long dst, @NativeType("CUdeviceptr") long src, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemcpyAsync;
if (CHECKS) {
check(__functionAddress);
check(dst);
check(src);
}
return callPPPPI(dst, src, ByteCount, hStream, __functionAddress);
}
// --- [ cuMemcpyPeerAsync ] ---
/**
* Copies device memory between two contexts asynchronously.
*
* Copies from device memory in one context to device memory in another context. {@code dstDevice} is the base device pointer of the destination memory
* and {@code dstContext} is the destination context. {@code srcDevice} is the base device pointer of the source memory and {@code srcContext} is the
* source pointer. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param dstContext destination context
* @param srcDevice source device pointer
* @param srcContext source context
* @param ByteCount size of memory copy in bytes
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyPeerAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUcontext") long dstContext, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUcontext") long srcContext, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemcpyPeerAsync;
if (CHECKS) {
check(__functionAddress);
check(dstDevice);
check(dstContext);
check(srcDevice);
check(srcContext);
}
return callPPPPPPI(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream, __functionAddress);
}
// --- [ cuMemcpyHtoDAsync ] ---
/**
* Unsafe version of: {@link #cuMemcpyHtoDAsync MemcpyHtoDAsync}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyHtoDAsync(long dstDevice, long srcHost, long ByteCount, long hStream) {
long __functionAddress = Functions.MemcpyHtoDAsync;
if (CHECKS) {
check(dstDevice);
}
return callPPPPI(dstDevice, srcHost, ByteCount, hStream, __functionAddress);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ByteBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), srcHost.remaining(), hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ShortBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1, hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") IntBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") LongBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") FloatBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") DoubleBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Host to Device.
*
* Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") PointerBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT, hStream);
}
// --- [ cuMemcpyDtoHAsync ] ---
/**
* Unsafe version of: {@link #cuMemcpyDtoHAsync MemcpyDtoHAsync}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyDtoHAsync(long dstHost, long srcDevice, long ByteCount, long hStream) {
long __functionAddress = Functions.MemcpyDtoHAsync;
if (CHECKS) {
check(srcDevice);
}
return callPPPPI(dstHost, srcDevice, ByteCount, hStream, __functionAddress);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, dstHost.remaining(), hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 1, hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") IntBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") LongBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Device to Host.
*
* Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code
* ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination host pointer
* @param srcDevice source device pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoHAsync(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) {
return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT, hStream);
}
// --- [ cuMemcpyDtoDAsync ] ---
/**
* Copies memory from Device to Device.
*
* Copies from device memory to device memory. {@code dstDevice} and {@code srcDevice} are the base pointers of the destination and source, respectively.
* {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstDevice destination device pointer
* @param srcDevice source device pointer
* @param ByteCount size of memory copy in bytes
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyDtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemcpyDtoDAsync;
if (CHECKS) {
check(dstDevice);
check(srcDevice);
}
return callPPPPI(dstDevice, srcDevice, ByteCount, hStream, __functionAddress);
}
// --- [ cuMemcpyHtoAAsync ] ---
/**
* Unsafe version of: {@link #cuMemcpyHtoAAsync MemcpyHtoAAsync}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyHtoAAsync(long dstArray, long dstOffset, long srcHost, long ByteCount, long hStream) {
long __functionAddress = Functions.MemcpyHtoAAsync;
if (CHECKS) {
check(dstArray);
}
return callPPPPPI(dstArray, dstOffset, srcHost, ByteCount, hStream, __functionAddress);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ByteBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), srcHost.remaining(), hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ShortBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1, hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") IntBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") LongBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") FloatBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") DoubleBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Host to Array.
*
* Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the
* destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstArray destination array
* @param dstOffset offset in bytes of destination array
* @param srcHost source host pointer
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") PointerBuffer srcHost, @NativeType("CUstream") long hStream) {
return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT, hStream);
}
// --- [ cuMemcpyAtoHAsync ] ---
/**
* Unsafe version of: {@link #cuMemcpyAtoHAsync MemcpyAtoHAsync}
*
* @param ByteCount size of memory copy in bytes
*/
public static int ncuMemcpyAtoHAsync(long dstHost, long srcArray, long srcOffset, long ByteCount, long hStream) {
long __functionAddress = Functions.MemcpyAtoHAsync;
if (CHECKS) {
check(srcArray);
}
return callPPPPPI(dstHost, srcArray, srcOffset, ByteCount, hStream, __functionAddress);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, dstHost.remaining(), hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 1, hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") IntBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") LongBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream);
}
/**
* Copies memory from Array to Host.
*
* Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset}
* specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.
*
* @param dstHost destination pointer
* @param srcArray source array
* @param srcOffset offset in bytes of source array
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpyAtoHAsync(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) {
return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT, hStream);
}
// --- [ cuMemcpy2DAsync ] ---
/** Unsafe version of: {@link #cuMemcpy2DAsync Memcpy2DAsync} */
public static int ncuMemcpy2DAsync(long pCopy, long hStream) {
long __functionAddress = Functions.Memcpy2DAsync;
return callPPI(pCopy, hStream, __functionAddress);
}
/**
* Copies memory for 2D arrays.
*
* Perform a 2D memory copy according to the parameters specified in {@code pCopy}.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per
* row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the
* bytes per row to apply. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and
* {@code srcPitch} are ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the
* bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and
* the bytes per row to apply. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and
* {@code dstPitch} are ignored.
*
* {@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.
*
* For host pointers, the starting address is
*
*
* void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
*
* For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.
*
* {@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.
*
* For host pointers, the base address is
*
*
* void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
*
* For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.
*
* {@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to
* {@code Height} + {@code dstY}.
*
* {@code cuMemcpy2DAsync()} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@link #cuMemAllocPitch MemAllocPitch} passes back
* pitches that always work with {@link #cuMemcpy2D Memcpy2D}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array),
* {@code cuMemcpy2DAsync()} may fail for pitches not computed by {@code cuMemAllocPitch()}.
*
* @param pCopy parameters for the memory copy
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpy2DAsync(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy, @NativeType("CUstream") long hStream) {
return ncuMemcpy2DAsync(pCopy.address(), hStream);
}
// --- [ cuMemcpy3DAsync ] ---
/** Unsafe version of: {@link #cuMemcpy3DAsync Memcpy3DAsync} */
public static int ncuMemcpy3DAsync(long pCopy, long hStream) {
long __functionAddress = Functions.Memcpy3DAsync;
return callPPI(pCopy, hStream, __functionAddress);
}
/**
* Copies memory for 3D arrays.
*
* Perform a 3D memory copy according to the parameters specified in {@code pCopy}.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost}, {@code srcPitch} and {@code srcHeight} specify the (host) base address of the source
* data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice}, {@code srcPitch} and {@code srcHeight} specify the (device) base address of the
* source data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.
*
* If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice},
* {@code srcPitch} and {@code srcHeight} are ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the
* source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling
* context.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data, the bytes
* per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data, the
* bytes per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.
*
* If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice},
* {@code dstPitch} and {@code dstHeight} are ignored.
*
* - {@code srcXInBytes}, {@code srcY} and {@code srcZ} specify the base address of the source data for the copy.
*
* For host pointers, the starting address is
*
*
* void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
*
* For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.
*
* {@code dstXInBytes}, {@code dstY} and {@code dstZ} specify the base address of the destination data for the copy.
*
* For host pointers, the base address is
*
*
* void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
*
* For device pointers, the starting address is
*
*
* CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
*
* For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.
*
* {@code {@}code WidthInBytes}, {@code Height} and {@code Depth} specify the width (in bytes), height and depth of the 3D copy being performed.
*
* If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than
* or equal to {@code WidthInBytes} + {@code dstXInBytes}.
*
* If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to
* {@code Height} + {@code dstY}.
*
* {@link #cuMemcpy3DAsync Memcpy3DAsync} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}).
*
* The {@code srcLOD} and {@code dstLOD} members of the {@code CUDA_MEMCPY3D} structure must be set to 0.
*
* @param pCopy parameters for the memory copy
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpy3DAsync(@NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D pCopy, @NativeType("CUstream") long hStream) {
return ncuMemcpy3DAsync(pCopy.address(), hStream);
}
// --- [ cuMemcpy3DPeerAsync ] ---
/** Unsafe version of: {@link #cuMemcpy3DPeerAsync Memcpy3DPeerAsync} */
public static int ncuMemcpy3DPeerAsync(long pCopy, long hStream) {
long __functionAddress = Functions.Memcpy3DPeerAsync;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(pCopy, hStream, __functionAddress);
}
/**
* Copies memory between contexts asynchronously.
*
* Perform a 3D memory copy according to the parameters specified in {@code pCopy}.
*
* @param pCopy parameters for the memory copy
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemcpy3DPeerAsync(@NativeType("CUDA_MEMCPY3D_PEER const *") CUDA_MEMCPY3D_PEER pCopy, @NativeType("CUstream") long hStream) {
return ncuMemcpy3DPeerAsync(pCopy.address(), hStream);
}
// --- [ cuMemsetD8 ] ---
/**
* Initializes device memory.
*
* Sets the memory range of {@code N} 8-bit values to the specified value {@code uc}.
*
* @param dstDevice destination device pointer
* @param uc value to set
* @param N number of elements
*/
@NativeType("CUresult")
public static int cuMemsetD8(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned char") byte uc, @NativeType("size_t") long N) {
long __functionAddress = Functions.MemsetD8;
if (CHECKS) {
check(dstDevice);
}
return callPPI(dstDevice, uc, N, __functionAddress);
}
// --- [ cuMemsetD16 ] ---
/**
* Initializes device memory.
*
* Sets the memory range of {@code N} 16-bit values to the specified value {@code us}. The {@code dstDevice} pointer must be two byte aligned.
*
* @param dstDevice destination device pointer
* @param us value to set
* @param N number of elements
*/
@NativeType("CUresult")
public static int cuMemsetD16(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned short") short us, @NativeType("size_t") long N) {
long __functionAddress = Functions.MemsetD16;
if (CHECKS) {
check(dstDevice);
}
return callPPI(dstDevice, us, N, __functionAddress);
}
// --- [ cuMemsetD32 ] ---
/**
* Initializes device memory
*
* Sets the memory range of {@code N} 32-bit values to the specified value {@code ui}. The {@code dstDevice} pointer must be four byte aligned.
*
* @param dstDevice destination device pointer
* @param ui value to set
* @param N number of elements
*/
@NativeType("CUresult")
public static int cuMemsetD32(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned int") int ui, @NativeType("size_t") long N) {
long __functionAddress = Functions.MemsetD32;
if (CHECKS) {
check(dstDevice);
}
return callPPI(dstDevice, ui, N, __functionAddress);
}
// --- [ cuMemsetD2D8 ] ---
/**
* Initializes device memory.
*
* Sets the 2D memory range of {@code Width} 8-bit values to the specified value {@code uc}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. This function performs fastest when the pitch is one that has been passed back by
* {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param uc value to set
* @param Width width of row
* @param Height number of rows
*/
@NativeType("CUresult")
public static int cuMemsetD2D8(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned char") byte uc, @NativeType("size_t") long Width, @NativeType("size_t") long Height) {
long __functionAddress = Functions.MemsetD2D8;
if (CHECKS) {
check(dstDevice);
}
return callPPPPI(dstDevice, dstPitch, uc, Width, Height, __functionAddress);
}
// --- [ cuMemsetD2D16 ] ---
/**
* Initializes device memory.
*
* Sets the 2D memory range of {@code Width} 16-bit values to the specified value {@code us}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be two byte aligned.
* This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param us value to set
* @param Width width of row
* @param Height number of rows
*/
@NativeType("CUresult")
public static int cuMemsetD2D16(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned short") short us, @NativeType("size_t") long Width, @NativeType("size_t") long Height) {
long __functionAddress = Functions.MemsetD2D16;
if (CHECKS) {
check(dstDevice);
}
return callPPPPI(dstDevice, dstPitch, us, Width, Height, __functionAddress);
}
// --- [ cuMemsetD2D32 ] ---
/**
* Initializes device memory.
*
* Sets the 2D memory range of {@code Width} 32-bit values to the specified value {@code ui}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be four byte aligned.
* This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param ui value to set
* @param Width width of row
* @param Height number of rows
*/
@NativeType("CUresult")
public static int cuMemsetD2D32(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned int") int ui, @NativeType("size_t") long Width, @NativeType("size_t") long Height) {
long __functionAddress = Functions.MemsetD2D32;
if (CHECKS) {
check(dstDevice);
}
return callPPPPI(dstDevice, dstPitch, ui, Width, Height, __functionAddress);
}
// --- [ cuMemsetD8Async ] ---
/**
* Sets device memory
*
* Sets the memory range of {@code N} 8-bit values to the specified value {@code uc}.
*
* @param dstDevice destination device pointer
* @param uc value to set
* @param N number of elements
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD8Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned char") byte uc, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD8Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPI(dstDevice, uc, N, hStream, __functionAddress);
}
// --- [ cuMemsetD16Async ] ---
/**
* Sets device memory
*
* Sets the memory range of {@code N} 16-bit values to the specified value {@code us}. The {@code dstDevice} pointer must be two byte aligned.
*
* @param dstDevice destination device pointer
* @param us value to set
* @param N number of elements
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD16Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned short") short us, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD16Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPI(dstDevice, us, N, hStream, __functionAddress);
}
// --- [ cuMemsetD32Async ] ---
/**
* Sets device memory.
*
* Sets the memory range of {@code N} 32-bit values to the specified value {@code ui}. The {@code dstDevice} pointer must be four byte aligned.
*
* @param dstDevice destination device pointer
* @param ui value to set
* @param N number of elements
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD32Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned int") int ui, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD32Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPI(dstDevice, ui, N, hStream, __functionAddress);
}
// --- [ cuMemsetD2D8Async ] ---
/**
* Sets device memory.
*
* Sets the 2D memory range of {@code Width} 8-bit values to the specified value {@code uc}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. This function performs fastest when the pitch is one that has been passed back by
* {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param uc value to set
* @param Width width of row
* @param Height number of rows
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD2D8Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned char") byte uc, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD2D8Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPPPI(dstDevice, dstPitch, uc, Width, Height, hStream, __functionAddress);
}
// --- [ cuMemsetD2D16Async ] ---
/**
* Sets device memory.
*
* Sets the 2D memory range of {@code Width} 16-bit values to the specified value {@code us}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be two byte aligned.
* This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param us value to set
* @param Width width of row
* @param Height number of rows
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD2D16Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned short") short us, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD2D16Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPPPI(dstDevice, dstPitch, us, Width, Height, hStream, __functionAddress);
}
// --- [ cuMemsetD2D32Async ] ---
/**
* Sets device memory.
*
* Sets the 2D memory range of {@code Width} 32-bit values to the specified value {@code ui}. {@code Height} specifies the number of rows to set, and
* {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be four byte aligned.
* This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.
*
* @param dstDevice destination device pointer
* @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1)
* @param ui value to set
* @param Width width of row
* @param Height number of rows
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuMemsetD2D32Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned int") int ui, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemsetD2D32Async;
if (CHECKS) {
check(dstDevice);
}
return callPPPPPI(dstDevice, dstPitch, ui, Width, Height, hStream, __functionAddress);
}
// --- [ cuArrayCreate ] ---
/** Unsafe version of: {@link #cuArrayCreate ArrayCreate} */
public static int ncuArrayCreate(long pHandle, long pAllocateArray) {
long __functionAddress = Functions.ArrayCreate;
return callPPI(pHandle, pAllocateArray, __functionAddress);
}
/**
* Creates a 1D or 2D CUDA array.
*
* Creates a CUDA array according to the {@code CUDA_ARRAY_DESCRIPTOR} structure {@code pAllocateArray} and returns a handle to the new CUDA array in
* {@code *pHandle}.
*
* @param pHandle returned array
* @param pAllocateArray array descriptor
*/
@NativeType("CUresult")
public static int cuArrayCreate(@NativeType("CUarray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY_DESCRIPTOR const *") CUDA_ARRAY_DESCRIPTOR pAllocateArray) {
if (CHECKS) {
check(pHandle, 1);
}
return ncuArrayCreate(memAddress(pHandle), pAllocateArray.address());
}
// --- [ cuArrayGetDescriptor ] ---
/** Unsafe version of: {@link #cuArrayGetDescriptor ArrayGetDescriptor} */
public static int ncuArrayGetDescriptor(long pArrayDescriptor, long hArray) {
long __functionAddress = Functions.ArrayGetDescriptor;
if (CHECKS) {
check(hArray);
}
return callPPI(pArrayDescriptor, hArray, __functionAddress);
}
/**
* Get a 1D or 2D CUDA array descriptor.
*
* Returns in {@code *pArrayDescriptor} a descriptor containing information on the format and dimensions of the CUDA array {@code hArray}. It is useful
* for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.
*
* @param pArrayDescriptor returned array descriptor
* @param hArray array to get descriptor of
*/
@NativeType("CUresult")
public static int cuArrayGetDescriptor(@NativeType("CUDA_ARRAY_DESCRIPTOR *") CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, @NativeType("CUarray") long hArray) {
return ncuArrayGetDescriptor(pArrayDescriptor.address(), hArray);
}
// --- [ cuArrayGetSparseProperties ] ---
/** Unsafe version of: {@link #cuArrayGetSparseProperties ArrayGetSparseProperties} */
public static int ncuArrayGetSparseProperties(long sparseProperties, long array) {
long __functionAddress = Functions.ArrayGetSparseProperties;
if (CHECKS) {
check(__functionAddress);
check(array);
}
return callPPI(sparseProperties, array, __functionAddress);
}
/**
* Returns the layout properties of a sparse CUDA array.
*
* Returns the layout properties of a sparse CUDA array in {@code sparseProperties} If the CUDA array is not allocated with flag {@link #CUDA_ARRAY3D_SPARSE}
* {@link #CUDA_ERROR_INVALID_VALUE} will be returned.
*
* If the returned value in {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code flags} contains {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL}, then
* {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} represents the total size of the array. Otherwise, it will be zero. Also, the returned value in
* {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel} is always zero. Note that the {@code array} must have been allocated using {@link #cuArrayCreate ArrayCreate} or
* {@link #cuArray3DCreate Array3DCreate}. For CUDA arrays obtained using {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel}, {@link #CUDA_ERROR_INVALID_VALUE} will be returned. Instead,
* {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties} must be used to obtain the sparse properties of the entire CUDA mipmapped array to which {@code array} belongs to.
*
* @param sparseProperties pointer to {@code CUDA_ARRAY_SPARSE_PROPERTIES}
* @param array CUDA array to get the sparse properties of
*/
@NativeType("CUresult")
public static int cuArrayGetSparseProperties(@NativeType("CUDA_ARRAY_SPARSE_PROPERTIES *") CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, @NativeType("CUarray") long array) {
return ncuArrayGetSparseProperties(sparseProperties.address(), array);
}
// --- [ cuMipmappedArrayGetSparseProperties ] ---
/** Unsafe version of: {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties} */
public static int ncuMipmappedArrayGetSparseProperties(long sparseProperties, long mipmap) {
long __functionAddress = Functions.MipmappedArrayGetSparseProperties;
if (CHECKS) {
check(__functionAddress);
check(mipmap);
}
return callPPI(sparseProperties, mipmap, __functionAddress);
}
/**
* Returns the layout properties of a sparse CUDA mipmapped array.
*
* Returns the sparse array layout properties in {@code sparseProperties} If the CUDA mipmapped array is not allocated with flag {@link #CUDA_ARRAY3D_SPARSE}
* {@link #CUDA_ERROR_INVALID_VALUE} will be returned.
*
* For non-layered CUDA mipmapped arrays, {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code ::miptailSize} returns the size of the mip tail region. The mip tail region
* includes all mip levels whose width, height or depth is less than that of the tile. For layered CUDA mipmapped arrays, if
* {@code CUDA_ARRAY_SPARSE_PROPERTIES::flags} contains {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL}, then {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize}
* specifies the size of the mip tail of all layers combined. Otherwise, {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} specifies mip tail size per
* layer. The returned value of {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel} is valid only if {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize}
* is non-zero.
*
* @param sparseProperties pointer to {@code CUDA_ARRAY_SPARSE_PROPERTIES}
* @param mipmap CUDA mipmapped array to get the sparse properties of
*/
@NativeType("CUresult")
public static int cuMipmappedArrayGetSparseProperties(@NativeType("CUDA_ARRAY_SPARSE_PROPERTIES *") CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, @NativeType("CUmipmappedArray") long mipmap) {
return ncuMipmappedArrayGetSparseProperties(sparseProperties.address(), mipmap);
}
// --- [ cuArrayGetPlane ] ---
/** Unsafe version of: {@link #cuArrayGetPlane ArrayGetPlane} */
public static int ncuArrayGetPlane(long pPlaneArray, long hArray, int planeIdx) {
long __functionAddress = Functions.ArrayGetPlane;
if (CHECKS) {
check(__functionAddress);
check(hArray);
}
return callPPI(pPlaneArray, hArray, planeIdx, __functionAddress);
}
/**
* Gets a CUDA array plane from a CUDA array.
*
* Returns in {@code pPlaneArray} a CUDA array that represents a single format plane of the CUDA array {@code hArray}.
*
* If {@code planeIdx} is greater than the maximum number of planes in this array or if the array does not have a multi-planar format e.g:
* {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12}, then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* Note that if the {@code hArray} has format {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12}, then passing in 0 for {@code planeIdx} returns a CUDA array of the same size as {@code
* hArray} but with one channel and {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} as its format. If 1 is passed for {@code planeIdx}, then the returned CUDA array has half
* the height and width of {@code hArray} with two channels and {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} as its format.
*
* @param pPlaneArray returned CUDA array referenced by the {@code planeIdx}
* @param hArray multiplanar CUDA array
* @param planeIdx plane index
*/
@NativeType("CUresult")
public static int cuArrayGetPlane(@NativeType("CUarray *") PointerBuffer pPlaneArray, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int planeIdx) {
if (CHECKS) {
check(pPlaneArray, 1);
}
return ncuArrayGetPlane(memAddress(pPlaneArray), hArray, planeIdx);
}
// --- [ cuArrayDestroy ] ---
/**
* Destroys a CUDA array.
*
* Destroys the CUDA array {@code hArray}.
*
* @param hArray array to destroy
*/
@NativeType("CUresult")
public static int cuArrayDestroy(@NativeType("CUarray") long hArray) {
long __functionAddress = Functions.ArrayDestroy;
if (CHECKS) {
check(hArray);
}
return callPI(hArray, __functionAddress);
}
// --- [ cuArray3DCreate ] ---
/** Unsafe version of: {@link #cuArray3DCreate Array3DCreate} */
public static int ncuArray3DCreate(long pHandle, long pAllocateArray) {
long __functionAddress = Functions.Array3DCreate;
return callPPI(pHandle, pAllocateArray, __functionAddress);
}
/**
* Creates a 3D CUDA array.
*
* Creates a CUDA array according to the {@link CUDA_ARRAY3D_DESCRIPTOR} structure {@code pAllocateArray} and returns a handle to the new CUDA array in {@code
*pHandle}.
*
*
* - {@code Width}, {@code Height}, and {@code Depth} are the width, height, and depth of the CUDA array (in elements); the following types of CUDA
* arrays can be allocated:
*
*
* - A 1D array is allocated if {@code Height} and {@code Depth} extents are both zero.
* - A 2D array is allocated if only {@code Depth} extent is zero.
* - A 3D array is allocated if all three extents are non-zero.
* - A 1D layered CUDA array is allocated if only {@code Height} is zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 1D array. The
* number of layers is determined by the depth extent.
* - A 2D layered CUDA array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 2D array. The
* number of layers is determined by the depth extent.
* - A cubemap CUDA array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_CUBEMAP} flag is set. {@code Width} must be equal to
* {@code Height}, and {@code Depth} must be six. A cubemap is a special type of 2D layered CUDA array, where the six layers represent the six
* faces of a cube. The order of the six layers in memory is the same as that listed in {@code CUarray_cubemap_face}.
* - A cubemap layered CUDA array is allocated if all three extents are non-zero, and both, {@link #CUDA_ARRAY3D_CUBEMAP} and {@link #CUDA_ARRAY3D_LAYERED} flags
* are set. {@code Width} must be equal to {@code Height}, and {@code Depth} must be a multiple of six. A cubemap layered CUDA array is a special
* type of 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first cubemap, the next six layers
* form the second cubemap, and so on.
*
* - {@code Format} specifies the format of the elements.
* - {@code NumChannels} specifies the number of packed components per CUDA array element; it may be 1, 2, or 4;
* - {@code Flags} may be set to
*
*
* - {@link #CUDA_ARRAY3D_LAYERED} to enable creation of layered CUDA arrays. If this flag is set, {@code Depth} specifies the number of layers, not the
* depth of a 3D array.
* - {@link #CUDA_ARRAY3D_SURFACE_LDST} to enable surface references to be bound to the CUDA array. If this flag is not set, {@link #cuSurfRefSetArray SurfRefSetArray} will fail
* when attempting to bind the CUDA array to a surface reference.
* - {@link #CUDA_ARRAY3D_CUBEMAP} to enable creation of cubemaps. If this flag is set, {@code Width} must be equal to {@code Height}, and {@code Depth}
* must be six. If the {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then {@code Depth} must be a multiple of six.
* - {@link #CUDA_ARRAY3D_TEXTURE_GATHER} to indicate that the CUDA array will be used for texture gather. Texture gather can only be performed on 2D CUDA
* arrays.
*
*
*
* {@code Width}, {@code Height} and {@code Depth} must meet certain size requirements as listed in the following table. All values are specified in
* elements. Note that for brevity's sake, the full name of the device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH}.
*
* Note that 2D CUDA arrays have different size requirements if the {@link #CUDA_ARRAY3D_TEXTURE_GATHER} flag is set. {@code Width} and {@code Height} must not
* be greater than {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH} and {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT} respectively, in that
* case.
*
*
* CUDA array type
*
* Valid extents that must always be met
{(width range in elements), (height range),
* (depth range)}
*
* Valid extents with CUDA_ARRAY3D_SURFACE_LDST set
* {(width range in elements), (height range), (depth range)}
* 1D
*
* { (1,TEXTURE1D_WIDTH), 0, 0 }
*
* { (1,SURFACE1D_WIDTH), 0, 0 }
* 2D
*
* { (1,TEXTURE2D_WIDTH), (1,TEXTURE2D_HEIGHT), 0 }
*
* { (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }
* 3D
*
* { (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
*
OR
{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
* (1,TEXTURE3D_DEPTH_ALTERNATE) }
*
* { (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT),
* (1,SURFACE3D_DEPTH) }
* 1D Layered
*
* { (1,TEXTURE1D_LAYERED_WIDTH), 0,
* (1,TEXTURE1D_LAYERED_LAYERS) }
*
* { (1,SURFACE1D_LAYERED_WIDTH), 0,
* (1,SURFACE1D_LAYERED_LAYERS) }
* 2D Layered
*
* { (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
* (1,TEXTURE2D_LAYERED_LAYERS) }
*
* { (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
* (1,SURFACE2D_LAYERED_LAYERS) }
* Cubemap
*
* { (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }
*
* { (1,SURFACECUBEMAP_WIDTH),
* (1,SURFACECUBEMAP_WIDTH), 6 }
* Cubemap Layered
*
* { (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
* (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
*
* { (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
* (1,SURFACECUBEMAP_LAYERED_LAYERS) }
*
*
* @param pHandle returned array
* @param pAllocateArray 3D array descriptor
*/
@NativeType("CUresult")
public static int cuArray3DCreate(@NativeType("CUarray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY3D_DESCRIPTOR const *") CUDA_ARRAY3D_DESCRIPTOR pAllocateArray) {
if (CHECKS) {
check(pHandle, 1);
}
return ncuArray3DCreate(memAddress(pHandle), pAllocateArray.address());
}
// --- [ cuArray3DGetDescriptor ] ---
/** Unsafe version of: {@link #cuArray3DGetDescriptor Array3DGetDescriptor} */
public static int ncuArray3DGetDescriptor(long pArrayDescriptor, long hArray) {
long __functionAddress = Functions.Array3DGetDescriptor;
if (CHECKS) {
check(hArray);
}
return callPPI(pArrayDescriptor, hArray, __functionAddress);
}
/**
* Get a 3D CUDA array descriptor.
*
* Returns in {@code *pArrayDescriptor} a descriptor containing information on the format and dimensions of the CUDA array {@code hArray}. It is useful
* for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.
*
* This function may be called on 1D and 2D arrays, in which case the {@code Height} and/or {@code Depth} members of the descriptor struct will be set to
* 0.
*
* @param pArrayDescriptor returned 3D array descriptor
* @param hArray 3D array to get descriptor of
*/
@NativeType("CUresult")
public static int cuArray3DGetDescriptor(@NativeType("CUDA_ARRAY3D_DESCRIPTOR *") CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, @NativeType("CUarray") long hArray) {
return ncuArray3DGetDescriptor(pArrayDescriptor.address(), hArray);
}
// --- [ cuMipmappedArrayCreate ] ---
/** Unsafe version of: {@link #cuMipmappedArrayCreate MipmappedArrayCreate} */
public static int ncuMipmappedArrayCreate(long pHandle, long pMipmappedArrayDesc, int numMipmapLevels) {
long __functionAddress = Functions.MipmappedArrayCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(pHandle, pMipmappedArrayDesc, numMipmapLevels, __functionAddress);
}
/**
* Creates a CUDA mipmapped array.
*
* Creates a CUDA mipmapped array according to the {@link CUDA_ARRAY3D_DESCRIPTOR} structure {@code pMipmappedArrayDesc} and returns a handle to the new CUDA
* mipmapped array in {@code *pHandle}. {@code numMipmapLevels} specifies the number of mipmap levels to be allocated. This value is clamped to the range
* {@code [1, 1 + floor(log2(max(width, height, depth)))]}.
*
*
* - {@code Width}, {@code Height}, and {@code Depth} are the width, height, and depth of the CUDA array (in elements); the following types of CUDA
* arrays can be allocated:
*
*
* - A 1D mipmapped array is allocated if {@code Height} and {@code Depth} extents are both zero.
* - A 2D mipmapped array is allocated if only {@code Depth} extent is zero.
* - A 3D mipmapped array is allocated if all three extents are non-zero.
* - A 1D layered CUDA mipmapped array is allocated if only {@code Height} is zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 1D
* array. The number of layers is determined by the depth extent.
* - A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 2D
* array. The number of layers is determined by the depth extent.
* - A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_CUBEMAP} flag is set. {@code Width} must be
* equal to {@code Height}, and {@code Depth} must be six. A cubemap is a special type of 2D layered CUDA array, where the six layers represent
* the six faces of a cube. The order of the six layers in memory is the same as that listed in {@code CUarray_cubemap_face}.
* - A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, {@link #CUDA_ARRAY3D_CUBEMAP} and
* {@link #CUDA_ARRAY3D_LAYERED} flags are set. {@code Width} must be equal to {@code Height}, and {@code Depth} must be a multiple of six. A cubemap
* layered CUDA array is a special type of 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the
* first cubemap, the next six layers form the second cubemap, and so on.
*
* - {@code Format} specifies the format of the elements.
* - {@code NumChannels} specifies the number of packed components per CUDA array element; it may be 1, 2, or 4;
* - Flags may be set to:
*
*
* - {@link #CUDA_ARRAY3D_LAYERED} to enable creation of layered CUDA mipmapped arrays. If this flag is set, {@code Depth} specifies the number of layers,
* not the depth of a 3D array.
* - {@link #CUDA_ARRAY3D_SURFACE_LDST} to enable surface references to be bound to individual mipmap levels of the CUDA mipmapped array. If this flag is
* not set, {@link #cuSurfRefSetArray SurfRefSetArray} will fail when attempting to bind a mipmap level of the CUDA mipmapped array to a surface reference.
* - {@link #CUDA_ARRAY3D_CUBEMAP} to enable creation of mipmapped cubemaps. If this flag is set, {@code Width} must be equal to {@code Height}, and
* {@code Depth} must be six. If the {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then {@code Depth} must be a multiple of six.
* - {@link #CUDA_ARRAY3D_TEXTURE_GATHER} to indicate that the CUDA mipmapped array will be used for texture gather. Texture gather can only be performed on
* 2D CUDA mipmapped arrays.
*
*
*
* {@code Width}, {@code Height} and {@code Depth} must meet certain size requirements as listed in the following table. All values are specified in
* elements. Note that for brevity's sake, the full name of the device attribute is not specified. For ex., {@code TEXTURE1D_MIPMAPPED_WIDTH} refers to
* the device attribute {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH}.
*
*
* CUDA array type
*
* Valid extents that must always be met
{(width range in elements), (height range),
* (depth range)}
*
* Valid extents with CUDA_ARRAY3D_SURFACE_LDST set
* {(width range in elements), (height range), (depth range)}
* 1D
*
* { (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }
*
* { (1,SURFACE1D_WIDTH), 0, 0 }
* 2D
*
* { (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }
*
* { (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }
* 3D
*
* { (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
*
OR
{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
* (1,TEXTURE3D_DEPTH_ALTERNATE) }
*
* { (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT),
* (1,SURFACE3D_DEPTH) }
* 1D Layered
*
* { (1,TEXTURE1D_LAYERED_WIDTH), 0,
* (1,TEXTURE1D_LAYERED_LAYERS) }
*
* { (1,SURFACE1D_LAYERED_WIDTH), 0,
* (1,SURFACE1D_LAYERED_LAYERS) }
* 2D Layered
*
* { (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
* (1,TEXTURE2D_LAYERED_LAYERS) }
*
* { (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
* (1,SURFACE2D_LAYERED_LAYERS) }
* Cubemap
*
* { (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }
*
* { (1,SURFACECUBEMAP_WIDTH),
* (1,SURFACECUBEMAP_WIDTH), 6 }
* Cubemap Layered
*
* { (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
* (1,TEXTURECUBEMAP_LAYERED_LAYERS) }
*
* { (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
* (1,SURFACECUBEMAP_LAYERED_LAYERS) }
*
*
* @param pHandle returned mipmapped array
* @param pMipmappedArrayDesc mipmapped array descriptor
* @param numMipmapLevels number of mipmap levels
*/
@NativeType("CUresult")
public static int cuMipmappedArrayCreate(@NativeType("CUmipmappedArray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY3D_DESCRIPTOR const *") CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, @NativeType("unsigned int") int numMipmapLevels) {
if (CHECKS) {
check(pHandle, 1);
}
return ncuMipmappedArrayCreate(memAddress(pHandle), pMipmappedArrayDesc.address(), numMipmapLevels);
}
// --- [ cuMipmappedArrayGetLevel ] ---
/** Unsafe version of: {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel} */
public static int ncuMipmappedArrayGetLevel(long pLevelArray, long hMipmappedArray, int level) {
long __functionAddress = Functions.MipmappedArrayGetLevel;
if (CHECKS) {
check(__functionAddress);
check(hMipmappedArray);
}
return callPPI(pLevelArray, hMipmappedArray, level, __functionAddress);
}
/**
* Gets a mipmap level of a CUDA mipmapped array.
*
* Returns in {@code *pLevelArray} a CUDA array that represents a single mipmap level of the CUDA mipmapped array {@code hMipmappedArray}.
*
* If {@code level} is greater than the maximum number of levels in this mipmapped array, {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* @param pLevelArray returned mipmap level CUDA array
* @param hMipmappedArray CUDA mipmapped array
* @param level mipmap level
*/
@NativeType("CUresult")
public static int cuMipmappedArrayGetLevel(@NativeType("CUarray *") PointerBuffer pLevelArray, @NativeType("CUmipmappedArray") long hMipmappedArray, @NativeType("unsigned int") int level) {
if (CHECKS) {
check(pLevelArray, 1);
}
return ncuMipmappedArrayGetLevel(memAddress(pLevelArray), hMipmappedArray, level);
}
// --- [ cuMipmappedArrayDestroy ] ---
/**
* Destroys a CUDA mipmapped array.
*
* Destroys the CUDA mipmapped array {@code hMipmappedArray}.
*
* @param hMipmappedArray mipmapped array to destroy
*/
@NativeType("CUresult")
public static int cuMipmappedArrayDestroy(@NativeType("CUmipmappedArray") long hMipmappedArray) {
long __functionAddress = Functions.MipmappedArrayDestroy;
if (CHECKS) {
check(__functionAddress);
check(hMipmappedArray);
}
return callPI(hMipmappedArray, __functionAddress);
}
// --- [ cuMemAddressReserve ] ---
/** Unsafe version of: {@link #cuMemAddressReserve MemAddressReserve} */
public static int ncuMemAddressReserve(long ptr, long size, long alignment, long addr, long flags) {
long __functionAddress = Functions.MemAddressReserve;
if (CHECKS) {
check(__functionAddress);
check(addr);
}
return callPPPPJI(ptr, size, alignment, addr, flags, __functionAddress);
}
/**
* Allocate an address range reservation.
*
* Reserves a virtual address range based on the given parameters, giving the starting address of the range in {@code ptr}. This API requires a system
* that supports UVA. The size and address parameters must be a multiple of the host page size and the alignment must be a power of two or zero for
* default alignment.
*
* @param ptr resulting pointer to start of virtual address range allocated
* @param size size of the reserved virtual address range requested
* @param alignment alignment of the reserved virtual address range requested
* @param addr fixed starting address range requested
* @param flags currently unused, must be zero
*/
@NativeType("CUresult")
public static int cuMemAddressReserve(@NativeType("CUdeviceptr *") PointerBuffer ptr, @NativeType("size_t") long size, @NativeType("size_t") long alignment, @NativeType("CUdeviceptr") long addr, @NativeType("unsigned long long") long flags) {
if (CHECKS) {
check(ptr, 1);
}
return ncuMemAddressReserve(memAddress(ptr), size, alignment, addr, flags);
}
// --- [ cuMemAddressFree ] ---
/**
* Free an address range reservation.
*
* Frees a virtual address range reserved by {@link #cuMemAddressReserve MemAddressReserve}. The size must match what was given to {@code memAddressReserve} and the ptr given must
* match what was returned from {@code memAddressReserve}.
*
* @param ptr starting address of the virtual address range to free
* @param size size of the virtual address region to free
*/
@NativeType("CUresult")
public static int cuMemAddressFree(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size) {
long __functionAddress = Functions.MemAddressFree;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPI(ptr, size, __functionAddress);
}
// --- [ cuMemCreate ] ---
/** Unsafe version of: {@link #cuMemCreate MemCreate} */
public static int ncuMemCreate(long handle, long size, long prop, long flags) {
long __functionAddress = Functions.MemCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPPJI(handle, size, prop, flags, __functionAddress);
}
/**
* Create a CUDA memory handle representing a memory allocation of a given size described by the given properties.
*
* This creates a memory allocation on the target device specified through the {@code prop} strcuture. The created allocation will not have any device or
* host mappings. The generic memory {@code handle} for the allocation can be mapped to the address space of calling process via {@link #cuMemMap MemMap}. This handle
* cannot be transmitted directly to other processes (see {@link #cuMemExportToShareableHandle MemExportToShareableHandle}). On Windows, the caller must also pass an
* {@code LPSECURITYATTRIBUTE} in {@code prop} to be associated with this handle which limits or allows access to this handle for a recepient process (see
* {@link CUmemAllocationProp}{@code ::win32HandleMetaData} for more). The {@code size} of this allocation must be a multiple of the the value given via
* {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} with the {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} flag. If {@link CUmemAllocationProp}{@code ::allocFlags::usage} contains
* {@link #CU_MEM_CREATE_USAGE_TILE_POOL MEM_CREATE_USAGE_TILE_POOL} flag then the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays and sparse CUDA
* mipmapped arrays. (see {@link #cuMemMapArrayAsync MemMapArrayAsync}).
*
* @param handle value of handle returned. All operations on this allocation are to be performed using this handle.
* @param size size of the allocation requested
* @param prop properties of the allocation to create
* @param flags flags for future use, must be zero now
*/
@NativeType("CUresult")
public static int cuMemCreate(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("size_t") long size, @NativeType("CUmemAllocationProp const *") CUmemAllocationProp prop, @NativeType("unsigned long long") long flags) {
if (CHECKS) {
check(handle, 1);
}
return ncuMemCreate(memAddress(handle), size, prop.address(), flags);
}
// --- [ cuMemRelease ] ---
/**
* Release a memory handle representing a memory allocation which was previously allocated through {@link #cuMemCreate MemCreate}.
*
* Frees the memory that was allocated on a device through {@code cuMemCreate}.
*
* The memory allocation will be freed when all outstanding mappings to the memory are unmapped and when all outstanding references to the handle
* (including it's shareable counterparts) are also released. The generic memory handle can be freed when there are still outstanding mappings made with
* this handle. Each time a recepient process imports a shareable handle, it needs to pair it with {@link #cuMemRelease MemRelease} for the handle to be freed. If {@code
* handle} is not a valid handle the behavior is undefined.
*
* @param handle value of handle which was returned previously by {@code cuMemCreate}
*/
@NativeType("CUresult")
public static int cuMemRelease(@NativeType("CUmemGenericAllocationHandle") long handle) {
long __functionAddress = Functions.MemRelease;
if (CHECKS) {
check(__functionAddress);
}
return callJI(handle, __functionAddress);
}
// --- [ cuMemMap ] ---
/**
* Maps an allocation handle to a reserved virtual address range.
*
* Maps bytes of memory represented by {@code handle} starting from byte {@code offset} to {@code size} to address range [ {@code addr}, {@code addr} +
* {@code size]}. This range must be an address reservation previously reserved with {@link #cuMemAddressReserve MemAddressReserve}, and {@code offset} + {@code size} must be less
* than the size of the memory allocation. Both {@code ptr}, {@code size}, and {@code offset} must be a multiple of the value given via
* {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} with the {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} flag.
*
* Please note calling {@link #cuMemMap MemMap} does not make the address accessible, the caller needs to update accessibility of a contiguous mapped VA range by
* calling {@link #cuMemSetAccess MemSetAccess}.
*
* Once a recipient process obtains a shareable memory handle from {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle}, the process must use {@link #cuMemMap MemMap} to map the memory
* into its address ranges before setting accessibility with {@link #cuMemSetAccess MemSetAccess}.
*
* {@link #cuMemMap MemMap} can only create mappings on VA range reservations that are not currently mapped.
*
* @param ptr address where memory will be mapped
* @param size size of the memory mapping
* @param offset offset into the memory represented by - {@code handle} from which to start mapping - Note: currently must be zero
* @param handle handle to a shareable memory
* @param flags flags for future use, must be zero now
*/
@NativeType("CUresult")
public static int cuMemMap(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size, @NativeType("size_t") long offset, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("unsigned long long") long flags) {
long __functionAddress = Functions.MemMap;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPPJJI(ptr, size, offset, handle, flags, __functionAddress);
}
// --- [ cuMemMapArrayAsync ] ---
/**
* Unsafe version of: {@link #cuMemMapArrayAsync MemMapArrayAsync}
*
* @param count count of {@code CUarrayMapInfo} in {@code mapInfoList}
*/
public static int ncuMemMapArrayAsync(long mapInfoList, int count, long hStream) {
long __functionAddress = Functions.MemMapArrayAsync;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(mapInfoList, count, hStream, __functionAddress);
}
/**
* Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
*
* Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. Each operation is specified by a
* {@link CUarrayMapInfo} entry in the {@code mapInfoList} array of size {@code count}.
*
* where {@code CUarrayMapInfo::resourceType} specifies the type of resource to be operated on. If {@code CUarrayMapInfo::resourceType} is set to
* {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} then {@code CUarrayMapInfo::resource::array} must be set to a valid sparse CUDA array handle. The CUDA array must be
* either a 2D, 2D layered or 3D CUDA array and must have been allocated using {@link #cuArrayCreate ArrayCreate} or {@link #cuArray3DCreate Array3DCreate} with the flag {@link #CUDA_ARRAY3D_SPARSE}.
* For CUDA arrays obtained using {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel}, {@link #CUDA_ERROR_INVALID_VALUE} will be returned. If {@code CUarrayMapInfo::resourceType} is set to
* {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY} then {@code CUarrayMapInfo::resource::mipmap} must be set to a valid sparse CUDA mipmapped array handle.
* The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been allocated using {@link #cuMipmappedArrayCreate MipmappedArrayCreate} with
* the flag {@link #CUDA_ARRAY3D_SPARSE}.
*
* {@code CUarrayMapInfo::subresourceType} specifies the type of subresource within the resource.
*
* where {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL} indicates a sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels
* which are too small to span at least one tile in any dimension constitute the mip tail region as indicated by {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL}
* subresource type.
*
* If {@code CUarrayMapInfo::subresourceType} is set to {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL} then
* {@code CUarrayMapInfo::subresource::sparseLevel} struct must contain valid array subregion offsets and extents. The
* {@code CUarrayMapInfo::subresource::sparseLevel::offsetX}, {@code CUarrayMapInfo::subresource::sparseLevel::offsetY} and
* {@code CUarrayMapInfo::subresource::sparseLevel::offsetZ} must specify valid X, Y and Z offsets respectively. The
* {@code CUarrayMapInfo::subresource::sparseLevel::extentWidth}, {@code CUarrayMapInfo::subresource::sparseLevel::extentHeight} and
* {@code CUarrayMapInfo::subresource::sparseLevel::extentDepth} must specify valid width, height and depth extents respectively. These offsets and
* extents must be aligned to the corresponding tile dimension. For CUDA mipmapped arrays {@code CUarrayMapInfo::subresource::sparseLevel::level} must
* specify a valid mip level index. Otherwise, must be zero. For layered CUDA arrays and layered CUDA mipmapped arrays
* {@code CUarrayMapInfo::subresource::sparseLevel::layer} must specify a valid layer index. Otherwise, must be zero.
* {@code CUarrayMapInfo::subresource::sparseLevel::offsetZ} must be zero and {@code CUarrayMapInfo::subresource::sparseLevel::extentDepth} must be set to
* 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays. Tile extents can be obtained by calling {@link #cuArrayGetSparseProperties ArrayGetSparseProperties} and
* {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties}
*
* If {@code CUarrayMapInfo::subresourceType} is set to {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL} then {@code CUarrayMapInfo::subresource::miptail} struct
* must contain valid mip tail offset in {@code CUarrayMapInfo::subresource::miptail::offset} and size in
* {@code CUarrayMapInfo::subresource::miptail::size}. Both, mip tail offset and mip tail size must be aligned to the tile size. For layered CUDA
* mipmapped arrays which don't have the flag {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL} set in {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code ::flags} as returned by
* {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties}, {@code CUarrayMapInfo::subresource::miptail::layer} must specify a valid layer index. Otherwise, must be zero.
*
* {@code CUarrayMapInfo::memOperationType} specifies the type of operation.
*
* If {@code CUarrayMapInfo::memOperationType} is set to {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP} then the subresource will be mapped onto the tile pool memory specified
* by {@code CUarrayMapInfo::memHandle} at offset {@code CUarrayMapInfo::offset}. The tile pool allocation has to be created by specifying the
* {@link #CU_MEM_CREATE_USAGE_TILE_POOL MEM_CREATE_USAGE_TILE_POOL} flag when calling {@link #cuMemCreate MemCreate}. Also, {@code CUarrayMapInfo::memHandleType} must be set to {@link #CU_MEM_HANDLE_TYPE_GENERIC MEM_HANDLE_TYPE_GENERIC}.
*
* If {@code CUarrayMapInfo::memOperationType} is set to {@link #CU_MEM_OPERATION_TYPE_UNMAP MEM_OPERATION_TYPE_UNMAP} then an unmapping operation is performed.
* {@code CUarrayMapInfo::memHandle} must be NULL.
*
* {@code CUarrayMapInfo::deviceBitMask} specifies the list of devices that must map or unmap physical memory. Currently, this mask must have exactly one
* bit set, and the corresponding device must match the device associated with the stream. If {@code CUarrayMapInfo::memOperationType} is set to
* {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP}, the device must also match the device associated with the tile pool memory allocation as specified by
* {@code CUarrayMapInfo::memHandle}.
*
* {@code CUarrayMapInfo::flags} and {@code CUarrayMapInfo::reserved[]} are unused and must be set to zero.
*
* @param mapInfoList list of {@code CUarrayMapInfo}
* @param hStream stream identifier for the stream to use for map or unmap operations
*/
@NativeType("CUresult")
public static int cuMemMapArrayAsync(@NativeType("CUarrayMapInfo *") CUarrayMapInfo.Buffer mapInfoList, @NativeType("CUstream") long hStream) {
return ncuMemMapArrayAsync(mapInfoList.address(), mapInfoList.remaining(), hStream);
}
// --- [ cuMemUnmap ] ---
/**
* Unmap the backing memory of a given address range.
*
* The range must be the entire contiguous address range that was mapped to. In other words, {@link #cuMemUnmap MemUnmap} cannot unmap a sub-range of an address range
* mapped by {@link #cuMemCreate MemCreate} / {@link #cuMemMap MemMap}. Any backing memory allocations will be freed if there are no existing mappings and there are no unreleased memory
* handles.
*
* When {@link #cuMemUnmap MemUnmap} returns successfully the address range is converted to an address reservation and can be used for a future calls to {@link #cuMemMap MemMap}. Any
* new mapping to this virtual address will need to have access granted through {@link #cuMemSetAccess MemSetAccess}, as all mappings start with no accessibility setup.
*
* @param ptr starting address for the virtual address range to unmap
* @param size size of the virtual address range to unmap
*/
@NativeType("CUresult")
public static int cuMemUnmap(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size) {
long __functionAddress = Functions.MemUnmap;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPI(ptr, size, __functionAddress);
}
// --- [ cuMemSetAccess ] ---
/**
* Unsafe version of: {@link #cuMemSetAccess MemSetAccess}
*
* @param count number of {@code CUmemAccessDesc} in {@code desc}
*/
public static int ncuMemSetAccess(long ptr, long size, long desc, long count) {
long __functionAddress = Functions.MemSetAccess;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPPPI(ptr, size, desc, count, __functionAddress);
}
/**
* Set the access flags for each location specified in {@code desc} for the given virtual address range.
*
* Given the virtual address range via {@code ptr} and {@code size}, and the locations in the array given by {@code desc} and {@code count}, set the
* access flags for the target locations. The range must be a fully mapped address range containing all allocations created by {@link #cuMemMap MemMap} / {@link #cuMemCreate MemCreate}.
*
* @param ptr starting address for the virtual address range
* @param size length of the virtual address range
* @param desc array of {@code CUmemAccessDesc} that describe how to change the - mapping for each location specified
*/
@NativeType("CUresult")
public static int cuMemSetAccess(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size, @NativeType("CUmemAccessDesc const *") CUmemAccessDesc.Buffer desc) {
return ncuMemSetAccess(ptr, size, desc.address(), desc.remaining());
}
// --- [ cuMemGetAccess ] ---
/** Unsafe version of: {@link #cuMemGetAccess MemGetAccess} */
public static int ncuMemGetAccess(long flags, long location, long ptr) {
long __functionAddress = Functions.MemGetAccess;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPPI(flags, location, ptr, __functionAddress);
}
/**
* Get the access {@code flags} set for the given {@code location} and {@code ptr}.
*
* @param flags flags set for this location
* @param location location in which to check the flags for
* @param ptr address in which to check the access flags for
*/
@NativeType("CUresult")
public static int cuMemGetAccess(@NativeType("unsigned long long *") LongBuffer flags, @NativeType("CUmemLocation const *") CUmemLocation location, @NativeType("CUdeviceptr") long ptr) {
if (CHECKS) {
check(flags, 1);
}
return ncuMemGetAccess(memAddress(flags), location.address(), ptr);
}
// --- [ cuMemExportToShareableHandle ] ---
/** Unsafe version of: {@link #cuMemExportToShareableHandle MemExportToShareableHandle} */
public static int ncuMemExportToShareableHandle(long shareableHandle, long handle, int handleType, long flags) {
long __functionAddress = Functions.MemExportToShareableHandle;
if (CHECKS) {
check(__functionAddress);
}
return callPJJI(shareableHandle, handle, handleType, flags, __functionAddress);
}
/**
* Exports an allocation to a requested shareable handle type.
*
* Given a CUDA memory handle, create a shareable memory allocation handle that can be used to share the memory with other processes. The recipient
* process can convert the shareable handle back into a CUDA memory handle using {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} and map it with {@link #cuMemMap MemMap}. The
* implementation of what this handle is and how it can be transferred is defined by the requested handle type in {@code handleType}.
*
* Once all shareable handles are closed and the allocation is released, the allocated memory referenced will be released back to the OS and uses of the
* CUDA handle afterward will lead to undefined behavior.
*
* This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) that support importing memory from the shareable type
*
* @param shareableHandle pointer to the location in which to store the requested handle type
* @param handle CUDA handle for the memory allocation
* @param handleType type of shareable handle requested (defines type and size of the {@code shareableHandle} output parameter)
* @param flags reserved, must be zero
*/
@NativeType("CUresult")
public static int cuMemExportToShareableHandle(@NativeType("void *") ByteBuffer shareableHandle, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
return ncuMemExportToShareableHandle(memAddress(shareableHandle), handle, handleType, flags);
}
/**
* Exports an allocation to a requested shareable handle type.
*
* Given a CUDA memory handle, create a shareable memory allocation handle that can be used to share the memory with other processes. The recipient
* process can convert the shareable handle back into a CUDA memory handle using {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} and map it with {@link #cuMemMap MemMap}. The
* implementation of what this handle is and how it can be transferred is defined by the requested handle type in {@code handleType}.
*
* Once all shareable handles are closed and the allocation is released, the allocated memory referenced will be released back to the OS and uses of the
* CUDA handle afterward will lead to undefined behavior.
*
* This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) that support importing memory from the shareable type
*
* @param shareableHandle pointer to the location in which to store the requested handle type
* @param handle CUDA handle for the memory allocation
* @param handleType type of shareable handle requested (defines type and size of the {@code shareableHandle} output parameter)
* @param flags reserved, must be zero
*/
@NativeType("CUresult")
public static int cuMemExportToShareableHandle(@NativeType("void *") PointerBuffer shareableHandle, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
return ncuMemExportToShareableHandle(memAddress(shareableHandle), handle, handleType, flags);
}
// --- [ cuMemImportFromShareableHandle ] ---
/** Unsafe version of: {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} */
public static int ncuMemImportFromShareableHandle(long handle, long osHandle, int shHandleType) {
long __functionAddress = Functions.MemImportFromShareableHandle;
if (CHECKS) {
check(__functionAddress);
check(osHandle);
}
return callPPI(handle, osHandle, shHandleType, __functionAddress);
}
/**
* Imports an allocation from a requested shareable handle type.
*
* If the current process cannot support the memory described by this shareable handle, this API will error as {@link #CUDA_ERROR_NOT_SUPPORTED}.
*
* Note
*
* Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc) created on devices under an SLI group may not be supported, and thus
* this API will return {@link #CUDA_ERROR_NOT_SUPPORTED}. There is no guarantee that the contents of {@code handle} will be the same CUDA memory handle for the
* same given OS shareable handle, or the same underlying allocation.
*
*
* @param handle CUDA Memory handle for the memory allocation
* @param osHandle shareable Handle representing the memory allocation that is to be imported
* @param shHandleType handle type of the exported handle {@code CUmemAllocationHandleType}
*/
@NativeType("CUresult")
public static int cuMemImportFromShareableHandle(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("void *") long osHandle, @NativeType("CUmemAllocationHandleType") int shHandleType) {
if (CHECKS) {
check(handle, 1);
}
return ncuMemImportFromShareableHandle(memAddress(handle), osHandle, shHandleType);
}
// --- [ cuMemGetAllocationGranularity ] ---
/** Unsafe version of: {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} */
public static int ncuMemGetAllocationGranularity(long granularity, long prop, int option) {
long __functionAddress = Functions.MemGetAllocationGranularity;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(granularity, prop, option, __functionAddress);
}
/**
* Calculates either the minimal or recommended granularity.
*
* Calculates either the minimal or recommended granularity for a given allocation specification and returns it in granularity. This granularity can be
* used as a multiple for alignment, size, or address mapping.
*
* @param granularity returned granularity
* @param prop property for which to determine the granularity for
* @param option determines which granularity to return
*/
@NativeType("CUresult")
public static int cuMemGetAllocationGranularity(@NativeType("size_t *") PointerBuffer granularity, @NativeType("CUmemAllocationProp const *") CUmemAllocationProp prop, @NativeType("CUmemAllocationGranularity_flags") int option) {
if (CHECKS) {
check(granularity, 1);
}
return ncuMemGetAllocationGranularity(memAddress(granularity), prop.address(), option);
}
// --- [ cuMemGetAllocationPropertiesFromHandle ] ---
/** Unsafe version of: {@link #cuMemGetAllocationPropertiesFromHandle MemGetAllocationPropertiesFromHandle} */
public static int ncuMemGetAllocationPropertiesFromHandle(long prop, long handle) {
long __functionAddress = Functions.MemGetAllocationPropertiesFromHandle;
if (CHECKS) {
check(__functionAddress);
}
return callPJI(prop, handle, __functionAddress);
}
/**
* Retrieve the contents of the property structure defining properties for this handle.
*
* @param prop pointer to a properties structure which will hold the information about this handle
* @param handle handle which to perform the query on
*/
@NativeType("CUresult")
public static int cuMemGetAllocationPropertiesFromHandle(@NativeType("CUmemAllocationProp *") CUmemAllocationProp prop, @NativeType("CUmemGenericAllocationHandle") long handle) {
return ncuMemGetAllocationPropertiesFromHandle(prop.address(), handle);
}
// --- [ cuMemRetainAllocationHandle ] ---
/** Unsafe version of: {@link #cuMemRetainAllocationHandle MemRetainAllocationHandle} */
public static int ncuMemRetainAllocationHandle(long handle, long addr) {
long __functionAddress = Functions.MemRetainAllocationHandle;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(handle, addr, __functionAddress);
}
/**
* Given an address {@code addr}, returns the allocation handle of the backing memory allocation.
*
* The handle is guaranteed to be the same handle value used to map the memory. If the address requested is not mapped, the function will fail. The
* returned handle must be released with corresponding number of calls to {@link #cuMemRelease MemRelease}.
*
* Note
*
* The address {@code addr}, can be any address in a range previously mapped by {@link #cuMemMap MemMap}, and not necessarily the start address.
*
* @param handle CUDA Memory handle for the backing memory allocation
* @param addr memory address to query, that has been mapped previously
*/
@NativeType("CUresult")
public static int cuMemRetainAllocationHandle(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("void *") ByteBuffer addr) {
if (CHECKS) {
check(handle, 1);
}
return ncuMemRetainAllocationHandle(memAddress(handle), memAddress(addr));
}
// --- [ cuMemFreeAsync ] ---
/**
* Frees memory with stream ordered semantics.
*
* Inserts a free operation into {@code hStream}. The allocation must not be accessed after stream execution reaches the free. After this API returns,
* accessing the memory from any subsequent work launched on the GPU or querying its pointer attributes results in undefined behavior.
*
* Note
*
* During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph
* allocation.
*
* @param dptr memory to free
* @param hStream the stream establishing the stream ordering contract
*/
@NativeType("CUresult")
public static int cuMemFreeAsync(@NativeType("CUdeviceptr") long dptr, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemFreeAsync;
if (CHECKS) {
check(__functionAddress);
check(dptr);
}
return callPPI(dptr, hStream, __functionAddress);
}
// --- [ cuMemAllocAsync ] ---
/** Unsafe version of: {@link #cuMemAllocAsync MemAllocAsync} */
public static int ncuMemAllocAsync(long dptr, long bytesize, long hStream) {
long __functionAddress = Functions.MemAllocAsync;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(dptr, bytesize, hStream, __functionAddress);
}
/**
* Allocates memory with stream ordered semantics
*
* Inserts an allocation operation into {@code hStream}. A pointer to the allocated memory is returned immediately in {@code *dptr}. The allocation must
* not be accessed until the the allocation operation completes. The allocation comes from the memory pool current to the stream's device.
*
* Note
*
* The default memory pool of a device contains device memory from that device.
*
* Note
*
* Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA
* events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
*
* Note
*
* During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph
* instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
*
* @param dptr returned device pointer
* @param bytesize number of bytes to allocate
* @param hStream the stream establishing the stream ordering contract and the memory pool to allocate from
*/
@NativeType("CUresult")
public static int cuMemAllocAsync(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("CUstream") long hStream) {
if (CHECKS) {
check(dptr, 1);
}
return ncuMemAllocAsync(memAddress(dptr), bytesize, hStream);
}
// --- [ cuMemPoolTrimTo ] ---
/**
* Tries to release memory back to the OS.
*
* Releases memory back to the OS until the pool contains fewer than {@code minBytesToKeep} reserved bytes, or there is no more memory that the allocator
* can safely release. The allocator cannot release OS allocations that back outstanding asynchronous allocations. The OS allocations may happen at
* different granularity from the user allocations.
*
* Note
*
* Allocations that have not been freed count as outstanding.
*
* Note
*
* Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as
* outstanding.
*
* @param pool the memory pool to trim
* @param minBytesToKeep if the pool has less than {@code minBytesToKeep} reserved, the {@code TrimTo} operation is a no-op. Otherwise the pool will be guaranteed to have
* at least {@code minBytesToKeep} bytes reserved after the operation.
*/
@NativeType("CUresult")
public static int cuMemPoolTrimTo(@NativeType("CUmemoryPool") long pool, @NativeType("size_t") long minBytesToKeep) {
long __functionAddress = Functions.MemPoolTrimTo;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPI(pool, minBytesToKeep, __functionAddress);
}
// --- [ cuMemPoolSetAttribute ] ---
/** Unsafe version of: {@link #cuMemPoolSetAttribute MemPoolSetAttribute} */
public static int ncuMemPoolSetAttribute(long pool, int attr, long value) {
long __functionAddress = Functions.MemPoolSetAttribute;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPI(pool, attr, value, __functionAddress);
}
/**
* Sets attributes of a memory pool.
*
* @param pool the memory pool to modify
* @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign
*/
@NativeType("CUresult")
public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") ByteBuffer value) {
return ncuMemPoolSetAttribute(pool, attr, memAddress(value));
}
/**
* Sets attributes of a memory pool.
*
* @param pool the memory pool to modify
* @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign
*/
@NativeType("CUresult")
public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") IntBuffer value) {
return ncuMemPoolSetAttribute(pool, attr, memAddress(value));
}
/**
* Sets attributes of a memory pool.
*
* @param pool the memory pool to modify
* @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign
*/
@NativeType("CUresult")
public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") LongBuffer value) {
return ncuMemPoolSetAttribute(pool, attr, memAddress(value));
}
// --- [ cuMemPoolGetAttribute ] ---
/** Unsafe version of: {@link #cuMemPoolGetAttribute MemPoolGetAttribute} */
public static int ncuMemPoolGetAttribute(long pool, int attr, long value) {
long __functionAddress = Functions.MemPoolGetAttribute;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPI(pool, attr, value, __functionAddress);
}
/**
* Gets attributes of a memory pool.
*
* @param pool the memory pool to get attributes of
* @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value
*/
@NativeType("CUresult")
public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") ByteBuffer value) {
return ncuMemPoolGetAttribute(pool, attr, memAddress(value));
}
/**
* Gets attributes of a memory pool.
*
* @param pool the memory pool to get attributes of
* @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value
*/
@NativeType("CUresult")
public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") IntBuffer value) {
return ncuMemPoolGetAttribute(pool, attr, memAddress(value));
}
/**
* Gets attributes of a memory pool.
*
* @param pool the memory pool to get attributes of
* @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value
*/
@NativeType("CUresult")
public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") LongBuffer value) {
return ncuMemPoolGetAttribute(pool, attr, memAddress(value));
}
// --- [ cuMemPoolSetAccess ] ---
/**
* Unsafe version of: {@link #cuMemPoolSetAccess MemPoolSetAccess}
*
* @param count number of descriptors in the map array
*/
public static int ncuMemPoolSetAccess(long pool, long map, long count) {
long __functionAddress = Functions.MemPoolSetAccess;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPPI(pool, map, count, __functionAddress);
}
/**
* Controls visibility of pools between devices.
*
* @param pool the pool being modified
* @param map array of access descriptors. Each descriptor instructs the access to enable for a single gpu.
*/
@NativeType("CUresult")
public static int cuMemPoolSetAccess(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemAccessDesc const *") CUmemAccessDesc.Buffer map) {
return ncuMemPoolSetAccess(pool, map.address(), map.remaining());
}
// --- [ cuMemPoolGetAccess ] ---
/** Unsafe version of: {@link #cuMemPoolGetAccess MemPoolGetAccess} */
public static int ncuMemPoolGetAccess(long flags, long memPool, long location) {
long __functionAddress = Functions.MemPoolGetAccess;
if (CHECKS) {
check(__functionAddress);
check(memPool);
}
return callPPPI(flags, memPool, location, __functionAddress);
}
/**
* Returns the accessibility of a pool from a device.
*
* Returns the accessibility of the pool's memory from the specified location.
*
* @param flags the accessibility of the pool from the specified location
* @param memPool the pool being queried
* @param location the location accessing the pool
*/
@NativeType("CUresult")
public static int cuMemPoolGetAccess(@NativeType("CUmemAccess_flags *") IntBuffer flags, @NativeType("CUmemoryPool") long memPool, @NativeType("CUmemLocation *") CUmemLocation location) {
if (CHECKS) {
check(flags, 1);
}
return ncuMemPoolGetAccess(memAddress(flags), memPool, location.address());
}
// --- [ cuMemPoolCreate ] ---
/** Unsafe version of: {@link #cuMemPoolCreate MemPoolCreate} */
public static int ncuMemPoolCreate(long pool, long poolProps) {
long __functionAddress = Functions.MemPoolCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(pool, poolProps, __functionAddress);
}
/**
* Creates a memory pool.
*
* Creates a CUDA memory pool and returns the handle in {@code pool}. The {@code poolProps} determines the properties of the pool such as the backing
* device and IPC capabilities.
*
* By default, the pool's memory will be accessible from the device it is allocated on.
*
* Note
*
* Specifying {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE} creates a memory pool that will not support IPC.
*/
@NativeType("CUresult")
public static int cuMemPoolCreate(@NativeType("CUmemoryPool *") PointerBuffer pool, @NativeType("CUmemPoolProps const *") CUmemPoolProps poolProps) {
if (CHECKS) {
check(pool, 1);
}
return ncuMemPoolCreate(memAddress(pool), poolProps.address());
}
// --- [ cuMemPoolDestroy ] ---
/**
* Destroys the specified memory pool.
*
* If any pointers obtained from this pool haven't been freed or the pool has free operations that haven't completed when {@link #cuMemPoolDestroy MemPoolDestroy} is invoked,
* the function will return immediately and the resources associated with the pool will be released automatically once there are no more outstanding
* allocations.
*
* Destroying the current mempool of a device sets the default mempool of that device as the current mempool for that device.
*
* Note
*
* A device's default memory pool cannot be destroyed.
*/
@NativeType("CUresult")
public static int cuMemPoolDestroy(@NativeType("CUmemoryPool") long pool) {
long __functionAddress = Functions.MemPoolDestroy;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPI(pool, __functionAddress);
}
// --- [ cuMemAllocFromPoolAsync ] ---
/** Unsafe version of: {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} */
public static int ncuMemAllocFromPoolAsync(long dptr, long bytesize, long pool, long hStream) {
long __functionAddress = Functions.MemAllocFromPoolAsync;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPPPI(dptr, bytesize, pool, hStream, __functionAddress);
}
/**
* Allocates memory from a specified pool with stream ordered semantics.
*
* Inserts an allocation operation into {@code hStream}. A pointer to the allocated memory is returned immediately in {@code *dptr}. The allocation must
* not be accessed until the the allocation operation completes. The allocation comes from the specified memory pool.
*
* Note
*
* The specified memory pool may be from a device different than that of the specified {@code hStream}.
*
*
* - Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events
* can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
*
*
* Note
*
* During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph
* instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
*
* @param dptr returned device pointer
* @param bytesize number of bytes to allocate
* @param pool the pool to allocate from
* @param hStream the stream establishing the stream ordering semantic
*/
@NativeType("CUresult")
public static int cuMemAllocFromPoolAsync(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("CUmemoryPool") long pool, @NativeType("CUstream") long hStream) {
if (CHECKS) {
check(dptr, 1);
}
return ncuMemAllocFromPoolAsync(memAddress(dptr), bytesize, pool, hStream);
}
// --- [ cuMemPoolExportToShareableHandle ] ---
/** Unsafe version of: {@link #cuMemPoolExportToShareableHandle MemPoolExportToShareableHandle} */
public static int ncuMemPoolExportToShareableHandle(long handle_out, long pool, int handleType, long flags) {
long __functionAddress = Functions.MemPoolExportToShareableHandle;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPJI(handle_out, pool, handleType, flags, __functionAddress);
}
/**
* Exports a memory pool to the requested handle type.
*
* Given an IPC capable mempool, create an OS handle to share the pool with another process. A recipient process can convert the shareable handle into a
* mempool with {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle}. Individual pointers can then be shared with the {@link #cuMemPoolExportPointer MemPoolExportPointer} and
* {@link #cuMemPoolImportPointer MemPoolImportPointer} APIs. The implementation of what the shareable handle is and how it can be transferred is defined by the requested handle
* type.
*
* Note
*
* To create an IPC capable mempool, create a mempool with a {@code CUmemAllocationHandleType} other than {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE}.
*
* @param handle_out returned OS handle
* @param pool pool to export
* @param handleType the type of handle to create
* @param flags must be 0
*/
@NativeType("CUresult")
public static int cuMemPoolExportToShareableHandle(@NativeType("void *") ByteBuffer handle_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
return ncuMemPoolExportToShareableHandle(memAddress(handle_out), pool, handleType, flags);
}
/**
* Exports a memory pool to the requested handle type.
*
* Given an IPC capable mempool, create an OS handle to share the pool with another process. A recipient process can convert the shareable handle into a
* mempool with {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle}. Individual pointers can then be shared with the {@link #cuMemPoolExportPointer MemPoolExportPointer} and
* {@link #cuMemPoolImportPointer MemPoolImportPointer} APIs. The implementation of what the shareable handle is and how it can be transferred is defined by the requested handle
* type.
*
* Note
*
* To create an IPC capable mempool, create a mempool with a {@code CUmemAllocationHandleType} other than {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE}.
*
* @param handle_out returned OS handle
* @param pool pool to export
* @param handleType the type of handle to create
* @param flags must be 0
*/
@NativeType("CUresult")
public static int cuMemPoolExportToShareableHandle(@NativeType("void *") PointerBuffer handle_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
return ncuMemPoolExportToShareableHandle(memAddress(handle_out), pool, handleType, flags);
}
// --- [ cuMemPoolImportFromShareableHandle ] ---
/** Unsafe version of: {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle} */
public static int ncuMemPoolImportFromShareableHandle(long pool_out, long handle, int handleType, long flags) {
long __functionAddress = Functions.MemPoolImportFromShareableHandle;
if (CHECKS) {
check(__functionAddress);
}
return callPPJI(pool_out, handle, handleType, flags, __functionAddress);
}
/**
* Imports a memory pool from a shared handle.
*
* Specific allocations can be imported from the imported pool with {@link #cuMemPoolImportPointer MemPoolImportPointer}.
*
* Note
*
* Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in {@link #cuDeviceSetMemPool DeviceSetMemPool} or
* {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} calls.
*
* @param pool_out returned memory pool
* @param handle OS handle of the pool to open
* @param handleType the type of handle being imported
* @param flags must be 0
*/
@NativeType("CUresult")
public static int cuMemPoolImportFromShareableHandle(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("void *") ByteBuffer handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
if (CHECKS) {
check(pool_out, 1);
}
return ncuMemPoolImportFromShareableHandle(memAddress(pool_out), memAddress(handle), handleType, flags);
}
/**
* Imports a memory pool from a shared handle.
*
* Specific allocations can be imported from the imported pool with {@link #cuMemPoolImportPointer MemPoolImportPointer}.
*
* Note
*
* Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in {@link #cuDeviceSetMemPool DeviceSetMemPool} or
* {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} calls.
*
* @param pool_out returned memory pool
* @param handle OS handle of the pool to open
* @param handleType the type of handle being imported
* @param flags must be 0
*/
@NativeType("CUresult")
public static int cuMemPoolImportFromShareableHandle(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("void *") PointerBuffer handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) {
if (CHECKS) {
check(pool_out, 1);
}
return ncuMemPoolImportFromShareableHandle(memAddress(pool_out), memAddress(handle), handleType, flags);
}
// --- [ cuMemPoolExportPointer ] ---
/** Unsafe version of: {@link #cuMemPoolExportPointer MemPoolExportPointer} */
public static int ncuMemPoolExportPointer(long shareData_out, long ptr) {
long __functionAddress = Functions.MemPoolExportPointer;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPI(shareData_out, ptr, __functionAddress);
}
/**
* Export data to share a memory pool allocation between processes.
*
* Constructs {@code shareData_out} for sharing a specific allocation from an already shared memory pool. The recipient process can import the allocation
* with the {@link #cuMemPoolImportPointer MemPoolImportPointer} api. The data is not a handle and may be shared through any IPC mechanism.
*
* @param shareData_out returned export data
* @param ptr pointer to memory being exported
*/
@NativeType("CUresult")
public static int cuMemPoolExportPointer(@NativeType("CUmemPoolPtrExportData *") CUmemPoolPtrExportData shareData_out, @NativeType("CUdeviceptr") long ptr) {
return ncuMemPoolExportPointer(shareData_out.address(), ptr);
}
// --- [ cuMemPoolImportPointer ] ---
/** Unsafe version of: {@link #cuMemPoolImportPointer MemPoolImportPointer} */
public static int ncuMemPoolImportPointer(long ptr_out, long pool, long shareData) {
long __functionAddress = Functions.MemPoolImportPointer;
if (CHECKS) {
check(__functionAddress);
check(pool);
}
return callPPPI(ptr_out, pool, shareData, __functionAddress);
}
/**
* Import a memory pool allocation from another process.
*
* Returns in {@code ptr_out} a pointer to the imported memory. The imported memory must not be accessed before the allocation operation completes in the
* exporting process. The imported memory must be freed from all importing processes before being freed in the exporting process. The pointer may be freed
* with {@link #cuMemFree MemFree} or {@link #cuMemFreeAsync MemFreeAsync}. If {@code cuMemFreeAsync} is used, the free must be completed on the importing process before the free operation on
* the exporting process.
*
* Note
*
* The {@code cuMemFreeAsync} api may be used in the exporting process before the cuMemFreeAsync operation completes in its stream as long as
* the {@code cuMemFreeAsync} in the exporting process specifies a stream with a stream dependency on the importing process's {@code cuMemFreeAsync}.
*
* @param ptr_out pointer to imported memory
* @param pool pool from which to import
* @param shareData data specifying the memory to import
*/
@NativeType("CUresult")
public static int cuMemPoolImportPointer(@NativeType("CUdeviceptr *") PointerBuffer ptr_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemPoolPtrExportData *") CUmemPoolPtrExportData shareData) {
if (CHECKS) {
check(ptr_out, 1);
}
return ncuMemPoolImportPointer(memAddress(ptr_out), pool, shareData.address());
}
// --- [ cuPointerGetAttribute ] ---
/** Unsafe version of: {@link #cuPointerGetAttribute PointerGetAttribute} */
public static int ncuPointerGetAttribute(long data, int attribute, long ptr) {
long __functionAddress = Functions.PointerGetAttribute;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPI(data, attribute, ptr, __functionAddress);
}
/**
* Returns information about a pointer.
*
* The supported attributes are:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of
* {@code data} must be {@code CUcontext *}.
*
*
If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}:
*
*
Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of
* {@code data} must be unsigned int.
*
* If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides
* is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.
*
* If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.
*
* If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running
* in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}.
*
*
If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
* Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program.
* The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access
* {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
*
Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct
* of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}.
*
*
{@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the
* lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has
* a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}:
*
*
A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will
* always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous
* memory operations can exhibit asynchronous behavior.
* - {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to
* an unsigned long long.
*
*
{@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation
* APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique
* within a single process.
* - {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not.
*
*
If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was
* allocated or registered.
* - {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is
* suitable for {@code cudaIpcGetMemHandle()}.
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}.
* Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from
* {@link #cuMemAddressReserve MemAddressReserve}).
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is
* not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}).
* To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
* - {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a
* backing allocation.
* - {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to
* {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
* - {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
*
*
* Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The
* exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the
* {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the
* allocation. In particular
*
*
* - The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
* - The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
*
*
* For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and
* device addresses from either address.
*
* @param data returned pointer attribute value
* @param attribute pointer attribute to query
* @param ptr pointer
*/
@NativeType("CUresult")
public static int cuPointerGetAttribute(@NativeType("void *") ByteBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) {
return ncuPointerGetAttribute(memAddress(data), attribute, ptr);
}
/**
* Returns information about a pointer.
*
* The supported attributes are:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of
* {@code data} must be {@code CUcontext *}.
*
*
If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}:
*
*
Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of
* {@code data} must be unsigned int.
*
* If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides
* is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.
*
* If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.
*
* If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running
* in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}.
*
*
If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
* Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program.
* The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access
* {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
*
Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct
* of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}.
*
*
{@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the
* lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has
* a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}:
*
*
A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will
* always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous
* memory operations can exhibit asynchronous behavior.
* - {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to
* an unsigned long long.
*
*
{@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation
* APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique
* within a single process.
* - {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not.
*
*
If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was
* allocated or registered.
* - {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is
* suitable for {@code cudaIpcGetMemHandle()}.
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}.
* Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from
* {@link #cuMemAddressReserve MemAddressReserve}).
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is
* not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}).
* To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
* - {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a
* backing allocation.
* - {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to
* {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
* - {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
*
*
* Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The
* exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the
* {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the
* allocation. In particular
*
*
* - The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
* - The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
*
*
* For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and
* device addresses from either address.
*
* @param data returned pointer attribute value
* @param attribute pointer attribute to query
* @param ptr pointer
*/
@NativeType("CUresult")
public static int cuPointerGetAttribute(@NativeType("void *") PointerBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) {
return ncuPointerGetAttribute(memAddress(data), attribute, ptr);
}
/**
* Returns information about a pointer.
*
* The supported attributes are:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of
* {@code data} must be {@code CUcontext *}.
*
*
If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}:
*
*
Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of
* {@code data} must be unsigned int.
*
* If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides
* is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.
*
* If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.
*
* If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running
* in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}.
*
*
If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
* Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program.
* The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access
* {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
*
Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct
* of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}.
*
*
{@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the
* lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has
* a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}:
*
*
A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will
* always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous
* memory operations can exhibit asynchronous behavior.
* - {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to
* an unsigned long long.
*
*
{@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation
* APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique
* within a single process.
* - {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not.
*
*
If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was
* allocated or registered.
* - {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is
* suitable for {@code cudaIpcGetMemHandle()}.
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}.
* Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from
* {@link #cuMemAddressReserve MemAddressReserve}).
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is
* not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}).
* To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
* - {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a
* backing allocation.
* - {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to
* {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
* - {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
*
*
* Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The
* exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the
* {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the
* allocation. In particular
*
*
* - The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
* - The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
*
*
* For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and
* device addresses from either address.
*
* @param data returned pointer attribute value
* @param attribute pointer attribute to query
* @param ptr pointer
*/
@NativeType("CUresult")
public static int cuPointerGetAttribute(@NativeType("void *") IntBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) {
return ncuPointerGetAttribute(memAddress(data), attribute, ptr);
}
/**
* Returns information about a pointer.
*
* The supported attributes are:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of
* {@code data} must be {@code CUcontext *}.
*
*
If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}:
*
*
Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of
* {@code data} must be unsigned int.
*
* If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides
* is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.
*
* If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.
*
* If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running
* in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}.
*
*
If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
* Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program.
* The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access
* {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
*
Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.
* - {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct
* of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}.
*
*
{@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the
* lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has
* a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}:
*
*
A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will
* always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous
* memory operations can exhibit asynchronous behavior.
* - {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to
* an unsigned long long.
*
*
{@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation
* APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique
* within a single process.
* - {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not.
*
*
If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
* - {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was
* allocated or registered.
* - {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is
* suitable for {@code cudaIpcGetMemHandle()}.
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}.
* Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from
* {@link #cuMemAddressReserve MemAddressReserve}).
* - {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is
* not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}).
* To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
* - {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a
* backing allocation.
* - {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to
* {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
* - {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
*
*
* Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The
* exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the
* {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the
* allocation. In particular
*
*
* - The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
* - The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
*
*
* For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and
* device addresses from either address.
*
* @param data returned pointer attribute value
* @param attribute pointer attribute to query
* @param ptr pointer
*/
@NativeType("CUresult")
public static int cuPointerGetAttribute(@NativeType("void *") LongBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) {
return ncuPointerGetAttribute(memAddress(data), attribute, ptr);
}
// --- [ cuMemPrefetchAsync ] ---
/**
* Prefetches memory to the specified destination device,
*
* Prefetches memory to the specified destination device. {@code devPtr} is the base device pointer of the memory to be prefetched and {@code dstDevice}
* is the destination device. {@code count} specifies the number of bytes to copy. {@code hStream} is the stream in which the operation is enqueued. The
* memory range must refer to managed memory allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables.
*
* Passing in {@link #CU_DEVICE_CPU DEVICE_CPU} for {@code dstDevice} will prefetch the data to host memory. If {@code dstDevice} is a GPU, then the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} must be non-zero. Additionally, {@code hStream} must be associated with a device that has a non-zero
* value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}.
*
* The start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the
* prefetch operation is enqueued in the stream.
*
* If no physical memory has been allocated for this region, then this memory region will be populated and mapped on the destination device. If there's
* insufficient memory to prefetch the desired region, the Unified Memory driver may evict pages from other {@link #cuMemAllocManaged MemAllocManaged} allocations to host memory
* in order to make room. Device memory allocated using {@link #cuMemAlloc MemAlloc} or {@link #cuArrayCreate ArrayCreate} will not be evicted.
*
* By default, any mappings to the previous location of the migrated pages are removed and mappings for the new location are only setup on {@code
* dstDevice}. The exact behavior however also depends on the settings applied to this memory range via {@link #cuMemAdvise MemAdvise} as described below:
*
* If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} was set on any subset of this memory range, then that subset will create a read-only copy of the pages on {@code
* dstDevice}.
*
* If {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} was called on any subset of this memory range, then the pages will be migrated to {@code dstDevice} even if
* {@code dstDevice} is not the preferred location of any pages in the memory range.
*
* If {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} was called on any subset of this memory range, then mappings to those pages from all the appropriate processors are
* updated to refer to the new location if establishing such a mapping is possible. Otherwise, those mappings are cleared.
*
* Note that this API is not required for functionality and only serves to improve performance by allowing the application to migrate data to a suitable
* location before it is accessed. Memory accesses to this range are always coherent and are allowed even when the data is actively being migrated.
*
* Note that this function is asynchronous with respect to the host and all work on other devices.
*
* @param devPtr pointer to be prefetched
* @param count size in bytes
* @param dstDevice destination device to prefetch to
* @param hStream stream to enqueue prefetch operation
*/
@NativeType("CUresult")
public static int cuMemPrefetchAsync(@NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count, @NativeType("CUdevice") int dstDevice, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.MemPrefetchAsync;
if (CHECKS) {
check(__functionAddress);
check(devPtr);
}
return callPPPI(devPtr, count, dstDevice, hStream, __functionAddress);
}
// --- [ cuMemAdvise ] ---
/**
* Advise about the usage of a given memory range.
*
* Advise the Unified Memory subsystem about the usage pattern for the memory range starting at {@code devPtr} with a size of {@code count} bytes. The
* start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the advice is
* applied. The memory range must refer to managed memory allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables. The memory range
* could also refer to system-allocated pageable memory provided it represents a valid, host-accessible region of memory and all additional constraints
* imposed by {@code advice} as outlined below are also satisfied. Specifying an invalid system-allocated pageable memory range results in an error being
* returned.
*
* The {@code advice} parameter can take the following values:
*
*
* - {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}: This implies that the data is mostly going to be read from and only occasionally written to. Any read accesses
* from any processor to this region will create a read-only copy of at least the accessed pages in that processor's memory. Additionally, if
* {@link #cuMemPrefetchAsync MemPrefetchAsync} is called on this region, it will create a read-only copy of the data on the destination processor. If any processor writes to
* this region, all copies of the corresponding page will be invalidated except for the one where the write occurred. The {@code device} argument is
* ignored for this advice. Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU that has a non-zero
* value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. Also, if a context is created on a device that does not have the
* device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} set, then read-duplication will not occur until all such contexts are destroyed.
* If the memory region refers to valid system-allocated pageable memory, then the accessing device must have a non-zero value for the device
* attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS} for a read-only copy to be created on that device. Note however that if the accessing device
* also has a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then setting this advice
* will not create a read-only copy when that device accesses this memory region.
* - {@link #CU_MEM_ADVISE_UNSET_READ_MOSTLY MEM_ADVISE_UNSET_READ_MOSTLY}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} and also prevents the Unified Memory driver from attempting
* heuristic read-duplication on the memory range. Any read-duplicated copies of the data will be collapsed into a single copy. The location for the
* collapsed copy will be the preferred location if the page has a preferred location and one of the read-duplicated copies was resident at that
* location. Otherwise, the location chosen is arbitrary.
* - {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION}: This advice sets the preferred location for the data to be the memory belonging to {@code device}. Passing
* in CU_DEVICE_CPU for {@code device} sets the preferred location as host memory. If {@code device} is a GPU, then it must have a non-zero value for
* the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. Setting the preferred location does not cause data to migrate to that
* location immediately. Instead, it guides the migration policy when a fault occurs on that memory region. If the data is already in its preferred
* location and the faulting processor can establish a mapping without requiring the data to be migrated, then data migration will be avoided. On the
* other hand, if the data is not in its preferred location or if a direct mapping cannot be established, then it will be migrated to the processor
* accessing it. It is important to note that setting the preferred location does not prevent data prefetching done using {@link #cuMemPrefetchAsync MemPrefetchAsync}. Having
* a preferred location can override the page thrash detection and resolution logic in the Unified Memory driver. Normally, if a page is detected to
* be constantly thrashing between for example host and device memory, the page may eventually be pinned to host memory by the Unified Memory driver.
* But if the preferred location is set as device memory, then the page will continue to thrash indefinitely. If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} is
* also set on this memory region or any subset of it, then the policies associated with that advice will override the policies of this advice, unless
* read accesses from {@code device} will not result in a read-only copy being created on that device as outlined in description for the advice
* {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}. If the memory region refers to valid system-allocated pageable memory, then {@code device} must have a non-zero
* value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code device} has a non-zero value for the device
* attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no effect. Note however that this behavior may
* change in the future.
* - {@link #CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION MEM_ADVISE_UNSET_PREFERRED_LOCATION}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} and changes the preferred location to none.
* - {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY}: This advice implies that the data will be accessed by {@code device}. Passing in {@link #CU_DEVICE_CPU DEVICE_CPU} for {@code
* device} will set the advice for the CPU. If {@code device} is a GPU, then the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} must
* be non-zero. This advice does not cause data migration and has no impact on the location of the data per se. Instead, it causes the data to always
* be mapped in the specified processor's page tables, as long as the location of the data permits a mapping to be established. If the data gets
* migrated for any reason, the mappings are updated accordingly. This advice is recommended in scenarios where data locality is not important, but
* avoiding faults is. Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the data located on one GPU is
* occasionally accessed by peer GPUs. In such scenarios, migrating data over to the other GPUs is not as important because the accesses are
* infrequent and the overhead of migration may be too high. But preventing faults can still help improve performance, and so having a mapping set up
* in advance is useful. Note that on CPU access of this data, the data may be migrated to host memory because the CPU typically cannot access device
* memory directly. Any GPU that had the {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} flag set for this data will now have its mapping updated to point to the page
* in host memory. If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} is also set on this memory region or any subset of it, then the policies associated with that
* advice will override the policies of this advice. Additionally, if the preferred location of this memory region or any subset of it is also {@code
* device}, then the policies associated with {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} will override the policies of this advice. If the memory region
* refers to valid system-allocated pageable memory, then {@code device} must have a non-zero value for the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code device} has a non-zero value for the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no effect.
* - {@link #CU_MEM_ADVISE_UNSET_ACCESSED_BY MEM_ADVISE_UNSET_ACCESSED_BY}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY}. Any mappings to the data from {@code device} may be
* removed at any time causing accesses to result in non-fatal page faults. If the memory region refers to valid system-allocated pageable memory,
* then {@code device} must have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code
* device} has a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no
* effect.
*
*
* @param devPtr pointer to memory to set the advice for
* @param count size in bytes of the memory range
* @param advice advice to be applied for the specified memory range
* @param device device to apply the advice for
*/
@NativeType("CUresult")
public static int cuMemAdvise(@NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count, @NativeType("CUmem_advise") int advice, @NativeType("CUdevice") int device) {
long __functionAddress = Functions.MemAdvise;
if (CHECKS) {
check(__functionAddress);
check(devPtr);
}
return callPPI(devPtr, count, advice, device, __functionAddress);
}
// --- [ cuMemRangeGetAttribute ] ---
/**
* Unsafe version of: {@link #cuMemRangeGetAttribute MemRangeGetAttribute}
*
* @param dataSize the size of {@code data}
*/
public static int ncuMemRangeGetAttribute(long data, long dataSize, int attribute, long devPtr, long count) {
long __functionAddress = Functions.MemRangeGetAttribute;
if (CHECKS) {
check(__functionAddress);
check(devPtr);
}
return callPPPPI(data, dataSize, attribute, devPtr, count, __functionAddress);
}
/**
* Query an attribute of a given memory range.
*
* Query an attribute about the memory range starting at {@code devPtr} with a size of {@code count} bytes. The memory range must refer to managed memory
* allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables.
*
* The {@code attribute} parameter can take the following values:
*
*
* - {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code dataSize}
* must be 4. The result returned will be 1 if all pages in the given memory range have read-duplication enabled, or 0 otherwise.
* - {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code
* dataSize} must be 4. The result returned will be a GPU device id if all pages in the memory range have that GPU as their preferred location, or it
* will be CU_DEVICE_CPU if all pages in the memory range have the CPU as their preferred location, or it will be CU_DEVICE_INVALID if either all the
* pages don't have the same preferred location or some of the pages don't have a preferred location at all. Note that the actual location of the
* pages in the memory range at the time of the query may be different from the preferred location.
* - {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY}: If this attribute is specified, {@code data} will be interpreted as an array of 32-bit integers, and {@code
* dataSize} must be a non-zero multiple of 4. The result returned will be a list of device ids that had {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} set for that
* entire memory range. If any device does not have that advice set for the entire memory range, that device will not be included. If {@code data} is
* larger than the number of devices that have that advice set for that memory range, CU_DEVICE_INVALID will be returned in all the extra space
* provided. For ex., if {@code dataSize} is 12 (i.e. {@code data} has 3 elements) and only device 0 has the advice set, then the result returned will
* be { 0, CU_DEVICE_INVALID, CU_DEVICE_INVALID }. If {@code data} is smaller than the number of devices that have that advice set, then only as many
* devices will be returned as can fit in the array. There is no guarantee on which specific devices will be returned, however.
* - {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code
* dataSize} must be 4. The result returned will be the last location to which all pages in the memory range were prefetched explicitly via
* {@link #cuMemPrefetchAsync MemPrefetchAsync}. This will either be a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU
* respectively. If any page in the memory range was never explicitly prefetched or if all pages were not prefetched to the same location,
* CU_DEVICE_INVALID will be returned. Note that this simply returns the last location that the applicaton requested to prefetch the memory range to.
* It gives no indication as to whether the prefetch operation to that location has completed or even begun.
*
*
* @param data a pointers to a memory location where the result of each attribute query will be written to
* @param attribute the attribute to query
* @param devPtr start of the range to query
* @param count size of the range to query
*/
@NativeType("CUresult")
public static int cuMemRangeGetAttribute(@NativeType("void *") ByteBuffer data, @NativeType("CUmem_range_attribute") int attribute, @NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count) {
return ncuMemRangeGetAttribute(memAddress(data), data.remaining(), attribute, devPtr, count);
}
// --- [ cuMemRangeGetAttributes ] ---
/**
* Unsafe version of: {@link #cuMemRangeGetAttributes MemRangeGetAttributes}
*
* @param numAttributes number of attributes to query
*/
public static int ncuMemRangeGetAttributes(long data, long dataSizes, long attributes, long numAttributes, long devPtr, long count) {
long __functionAddress = Functions.MemRangeGetAttributes;
if (CHECKS) {
check(__functionAddress);
check(devPtr);
}
return callPPPPPPI(data, dataSizes, attributes, numAttributes, devPtr, count, __functionAddress);
}
/**
* Query attributes of a given memory range.
*
* Query attributes of the memory range starting at {@code devPtr} with a size of {@code count} bytes. The memory range must refer to managed memory
* allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables. The {@code attributes} array will be interpreted to have {@code numAttributes}
* entries. The {@code dataSizes} array will also be interpreted to have {@code numAttributes} entries. The results of the query will be stored in {@code data}.
*
* The list of supported attributes are given below. Please refer to {@link #cuMemRangeGetAttribute MemRangeGetAttribute} for attribute descriptions and restrictions.
*
*
* - {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY}
* - {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION}
* - {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY}
* - {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}
*
*
* @param data a two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to
* @param dataSizes array containing the sizes of each result
* @param attributes an array of attributes to query (numAttributes and the number of attributes in this array should match). One of:
{@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY} {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION} {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY} {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}
* @param devPtr start of the range to query
* @param count size of the range to query
*/
@NativeType("CUresult")
public static int cuMemRangeGetAttributes(@NativeType("void **") PointerBuffer data, @NativeType("size_t *") PointerBuffer dataSizes, @NativeType("CUmem_range_attribute *") IntBuffer attributes, @NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count) {
if (CHECKS) {
check(data, attributes.remaining());
check(dataSizes, attributes.remaining());
}
return ncuMemRangeGetAttributes(memAddress(data), memAddress(dataSizes), memAddress(attributes), attributes.remaining(), devPtr, count);
}
// --- [ cuPointerSetAttribute ] ---
/** Unsafe version of: {@link #cuPointerSetAttribute PointerSetAttribute} */
public static int ncuPointerSetAttribute(long value, int attribute, long ptr) {
long __functionAddress = Functions.PointerSetAttribute;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPI(value, attribute, ptr, __functionAddress);
}
/**
* Set attributes on a previously allocated memory region.
*
* The supported attributes are:
*
*
* - {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: A boolean attribute that can either be set (1) or unset (0).
*
*
When set, the region of memory that {@code ptr} points to is guaranteed to always synchronize memory operations that are synchronous. If there are
* some previously initiated synchronous memory operations that are pending when this attribute is set, the function does not return until those
* memory operations are complete. See further documentation in the section titled "API synchronization behavior" to learn more about cases when
* synchronous memory operations can exhibit asynchronous behavior. {@code value} will be considered as a pointer to an unsigned integer to which this
* attribute is to be set.
*
*
* @param value pointer to memory containing the value to be set
* @param attribute pointer attribute to set
* @param ptr pointer to a memory region allocated using CUDA memory allocation APIs
*/
@NativeType("CUresult")
public static int cuPointerSetAttribute(@NativeType("void const *") ByteBuffer value, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) {
return ncuPointerSetAttribute(memAddress(value), attribute, ptr);
}
// --- [ cuPointerGetAttributes ] ---
/**
* Unsafe version of: {@link #cuPointerGetAttributes PointerGetAttributes}
*
* @param numAttributes number of attributes to query
*/
public static int ncuPointerGetAttributes(int numAttributes, long attributes, long data, long ptr) {
long __functionAddress = Functions.PointerGetAttributes;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPPI(numAttributes, attributes, data, ptr, __functionAddress);
}
/**
* Returns information about a pointer.
*
* Unlike {@link #cuPointerGetAttribute PointerGetAttribute}, this function will not return an error when the {@code ptr} encountered is not a valid CUDA pointer. Instead, the
* attributes are assigned default {@code NULL} values and {@link #CUDA_SUCCESS} is returned.
*
* If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses UVA (Unified Virtual Addressing),
* {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
* @param attributes an array of attributes to query (numAttributes and the number of attributes in this array should match). One of:
{@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE} {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS} {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID} {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED} {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL} {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE} {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR} {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE} {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED} {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES} {@link #CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE} {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAGS POINTER_ATTRIBUTE_ACCESS_FLAGS} {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE} {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE POINTER_ATTRIBUTE_ACCESS_FLAG_NONE} {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ POINTER_ATTRIBUTE_ACCESS_FLAG_READ} {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE}
* @param data a two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to
* @param ptr pointer to query
*/
@NativeType("CUresult")
public static int cuPointerGetAttributes(@NativeType("CUpointer_attribute *") IntBuffer attributes, @NativeType("void **") PointerBuffer data, @NativeType("CUdeviceptr") long ptr) {
if (CHECKS) {
check(data, attributes.remaining());
}
return ncuPointerGetAttributes(attributes.remaining(), memAddress(attributes), memAddress(data), ptr);
}
// --- [ cuStreamCreate ] ---
/** Unsafe version of: {@link #cuStreamCreate StreamCreate} */
public static int ncuStreamCreate(long phStream, int Flags) {
long __functionAddress = Functions.StreamCreate;
return callPI(phStream, Flags, __functionAddress);
}
/**
* Create a stream.
*
* Creates a stream and returns a handle in {@code phStream}. The {@code Flags} argument determines behaviors of the stream.
*
* Valid values for {@code Flags} are:
*
*
* - {@link #CU_STREAM_DEFAULT STREAM_DEFAULT}: Default stream creation flag.
* - {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING}: Specifies that work running in the created stream may run concurrently with work in stream 0 (the NULL stream), and that
* the created stream should perform no implicit synchronization with stream 0.
*
*
* @param phStream returned newly created stream
* @param Flags parameters for stream creation
*/
@NativeType("CUresult")
public static int cuStreamCreate(@NativeType("CUstream *") PointerBuffer phStream, @NativeType("unsigned int") int Flags) {
if (CHECKS) {
check(phStream, 1);
}
return ncuStreamCreate(memAddress(phStream), Flags);
}
// --- [ cuStreamCreateWithPriority ] ---
/** Unsafe version of: {@link #cuStreamCreateWithPriority StreamCreateWithPriority} */
public static int ncuStreamCreateWithPriority(long phStream, int flags, int priority) {
long __functionAddress = Functions.StreamCreateWithPriority;
return callPI(phStream, flags, priority, __functionAddress);
}
/**
* Create a stream with the given priority.
*
* Creates a stream with the specified priority and returns a handle in {@code phStream}. This API alters the scheduler priority of work in the stream.
* Work in a higher priority stream may preempt work already executing in a low priority stream.
*
* {@code priority} follows a convention where lower numbers represent higher priorities. {@code 0} represents default priority. The range of meaningful
* numerical priorities can be queried using {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}. If the specified priority is outside the numerical range returned by
* {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}, it will automatically be clamped to the lowest or the highest number in the range.
*
* Note
*
* Stream priorities are supported only on GPUs with compute capability 3.5 or higher.
*
* Note
*
* In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities
* have no effect on host-to-device and device-to-host memory operations.
*
* @param phStream returned newly created stream
* @param flags flags for stream creation. See {@link #cuStreamCreate StreamCreate} for a list of valid flags
* @param priority stream priority. Lower numbers represent higher priorities. See {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange} for more information about meaningful stream
* priorities that can be passed.
*/
@NativeType("CUresult")
public static int cuStreamCreateWithPriority(@NativeType("CUstream *") PointerBuffer phStream, @NativeType("unsigned int") int flags, int priority) {
if (CHECKS) {
check(phStream, 1);
}
return ncuStreamCreateWithPriority(memAddress(phStream), flags, priority);
}
// --- [ cuStreamGetPriority ] ---
/** Unsafe version of: {@link #cuStreamGetPriority StreamGetPriority} */
public static int ncuStreamGetPriority(long hStream, long priority) {
long __functionAddress = Functions.StreamGetPriority;
return callPPI(hStream, priority, __functionAddress);
}
/**
* Query the priority of a given stream.
*
* Query the priority of a stream created using {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority} and return the priority in {@code priority}. Note that if
* the stream was created with a priority outside the numerical range returned by {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}, this function returns the clamped
* priority. See {@link #cuStreamCreateWithPriority StreamCreateWithPriority} for details about priority clamping.
*
* @param hStream handle to the stream to be queried
* @param priority pointer to a signed integer in which the stream's priority is returned
*/
@NativeType("CUresult")
public static int cuStreamGetPriority(@NativeType("CUstream") long hStream, @NativeType("int *") IntBuffer priority) {
if (CHECKS) {
check(priority, 1);
}
return ncuStreamGetPriority(hStream, memAddress(priority));
}
// --- [ cuStreamGetFlags ] ---
/** Unsafe version of: {@link #cuStreamGetFlags StreamGetFlags} */
public static int ncuStreamGetFlags(long hStream, long flags) {
long __functionAddress = Functions.StreamGetFlags;
return callPPI(hStream, flags, __functionAddress);
}
/**
* Query the flags of a given stream.
*
* Query the flags of a stream created using {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority} and return the flags in {@code flags}.
*
* @param hStream handle to the stream to be queried
* @param flags pointer to an unsigned integer in which the stream's flags are returned The value returned in {@code flags} is a logical 'OR' of all flags that
* were used while creating this stream. See {@link #cuStreamCreate StreamCreate} for the list of valid flags.
*/
@NativeType("CUresult")
public static int cuStreamGetFlags(@NativeType("CUstream") long hStream, @NativeType("unsigned int *") IntBuffer flags) {
if (CHECKS) {
check(flags, 1);
}
return ncuStreamGetFlags(hStream, memAddress(flags));
}
// --- [ cuStreamGetCtx ] ---
/** Unsafe version of: {@link #cuStreamGetCtx StreamGetCtx} */
public static int ncuStreamGetCtx(long hStream, long pctx) {
long __functionAddress = Functions.StreamGetCtx;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(hStream, pctx, __functionAddress);
}
/**
* Query the context associated with a stream.
*
* Returns the CUDA context that the stream is associated with.
*
* The stream handle {@code hStream} can refer to any of the following:
*
*
* - a stream created via any of the CUDA driver APIs such as {@link #cuStreamCreate StreamCreate} and {@link #cuStreamCreateWithPriority StreamCreateWithPriority}, or their runtime API equivalents such as
* {@code cudaStreamCreate()}, {@code cudaStreamCreateWithFlags()} and {@code cudaStreamCreateWithPriority()}. The returned context is the context
* that was active in the calling thread when the stream was created. Passing an invalid handle will result in undefined behavior.
* - any of the special streams such as the {@code NULL} stream, {@link #CU_STREAM_LEGACY STREAM_LEGACY} and {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD}. The runtime API equivalents of these are also accepted,
* which are {@code NULL}, {@code cudaStreamLegacy()} and {@code cudaStreamPerThread()} respectively. Specifying any of the special handles will return the
* context current to the calling thread. If no context is current to the calling thread, {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
*
*
* @param hStream handle to the stream to be queried
* @param pctx returned context associated with the stream
*/
@NativeType("CUresult")
public static int cuStreamGetCtx(@NativeType("CUstream") long hStream, @NativeType("CUcontext *") PointerBuffer pctx) {
if (CHECKS) {
check(pctx, 1);
}
return ncuStreamGetCtx(hStream, memAddress(pctx));
}
// --- [ cuStreamWaitEvent ] ---
/**
* Make a compute stream wait on an event.
*
* Makes all future work submitted to {@code hStream} wait for all work captured in {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured
* by an event. The synchronization will be performed efficiently on the device when applicable. {@code hEvent} may be from a different context or device
* than {@code hStream}.
*
* @param hStream stream to wait
* @param hEvent event to wait on (may not be {@code NULL}). One of:
{@link #CU_EVENT_WAIT_DEFAULT EVENT_WAIT_DEFAULT} {@link #CU_EVENT_WAIT_EXTERNAL EVENT_WAIT_EXTERNAL}
* @param Flags see {@code CUevent_capture_flags}
*/
@NativeType("CUresult")
public static int cuStreamWaitEvent(@NativeType("CUstream") long hStream, @NativeType("CUevent") long hEvent, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.StreamWaitEvent;
if (CHECKS) {
check(hEvent);
}
return callPPI(hStream, hEvent, Flags, __functionAddress);
}
// --- [ cuStreamAddCallback ] ---
/** Unsafe version of: {@link #cuStreamAddCallback StreamAddCallback} */
public static int ncuStreamAddCallback(long hStream, long callback, long userData, int flags) {
long __functionAddress = Functions.StreamAddCallback;
if (CHECKS) {
check(__functionAddress);
check(userData);
}
return callPPPI(hStream, callback, userData, flags, __functionAddress);
}
/**
* Add a callback to a compute stream.
*
* Note
*
* This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error,
* consider using {@link #cuLaunchHostFunc LaunchHostFunc}. Additionally, this function is not supported with {@link #cuStreamBeginCapture StreamBeginCapture} and {@link #cuStreamEndCapture StreamEndCapture}, unlike
* {@link #cuLaunchHostFunc LaunchHostFunc}.
*
* Adds a callback to be called on the host after all currently enqueued items in the stream have completed. For each {@code cuStreamAddCallback} call,
* the callback will be executed exactly once. The callback will block later work in the stream until it is finished.
*
* The callback may be passed {@link #CUDA_SUCCESS} or an error code. In the event of a device error, all subsequently executed callbacks will receive an
* appropriate {@code CUresult}.
*
* Callbacks must not make any CUDA API calls. Attempting to use a CUDA API will result in {@link #CUDA_ERROR_NOT_PERMITTED}. Callbacks must not perform any
* synchronization that may depend on outstanding device work or other callbacks that are not mandated to run earlier. Callbacks without a mandated order
* (in independent streams) execute in undefined order and may be serialized.
*
* For the purposes of Unified Memory, callback execution makes a number of guarantees:
*
*
* - The callback stream is considered idle for the duration of the callback. Thus, for example, a callback may always use memory attached to the
* callback stream.
* - The start of execution of a callback has the same effect as synchronizing an event recorded in the same stream immediately prior to the callback.
* It thus synchronizes streams which have been "joined" prior to the callback.
* - Adding device work to any stream does not have the effect of making the stream active until all preceding host functions and stream callbacks have
* executed. Thus, for example, a callback might use global attached memory even if work has been added to another stream, if the work has been
* ordered behind the callback with an event.
* - Completion of a callback does not cause a stream to become active except as described above. The callback stream will remain idle if no device work
* follows the callback, and will remain idle across consecutive callbacks without device work in between. Thus, for example, stream synchronization
* can be done by signaling from a callback at the end of the stream.
*
*
* @param hStream stream to add callback to
* @param callback the function to call once preceding stream operations are complete
* @param userData user specified data to be passed to the callback function
* @param flags reserved for future use, must be 0
*/
@NativeType("CUresult")
public static int cuStreamAddCallback(@NativeType("CUstream") long hStream, @NativeType("void (*) (CUstream, CUresult, void *)") CUstreamCallbackI callback, @NativeType("void *") long userData, @NativeType("unsigned int") int flags) {
return ncuStreamAddCallback(hStream, callback.address(), userData, flags);
}
// --- [ cuStreamBeginCapture ] ---
/**
* Begins graph capture on a stream.
*
* Begin graph capture on {@code hStream}. When a stream is in capture mode, all operations pushed into the stream will not be executed, but will instead
* be captured into a graph, which will be returned via {@link #cuStreamEndCapture StreamEndCapture}. Capture may not be initiated if {@code stream} is {@link #CU_STREAM_LEGACY STREAM_LEGACY}. Capture
* must be ended on the same stream in which it was initiated, and it may only be initiated if the stream is not already in capture mode. The capture mode
* may be queried via {@link #cuStreamIsCapturing StreamIsCapturing}. A unique id representing the capture sequence may be queried via {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo}.
*
* Note
*
* Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is
* undefined behavior. This restriction does not apply to texture and surface objects.
*
* @param hStream stream in which to initiate capture
*/
@NativeType("CUresult")
public static int cuStreamBeginCapture(@NativeType("CUstream") long hStream) {
long __functionAddress = Functions.StreamBeginCapture;
if (CHECKS) {
check(__functionAddress);
}
return callPI(hStream, __functionAddress);
}
// --- [ cuStreamBeginCapture_v2 ] ---
/**
* Begins graph capture on a stream.
*
* Begin graph capture on {@code hStream}. When a stream is in capture mode, all operations pushed into the stream will not be executed, but will instead
* be captured into a graph, which will be returned via {@link #cuStreamEndCapture StreamEndCapture}. Capture may not be initiated if {@code stream} is {@link #CU_STREAM_LEGACY STREAM_LEGACY}. Capture
* must be ended on the same stream in which it was initiated, and it may only be initiated if the stream is not already in capture mode. The capture mode
* may be queried via {@link #cuStreamIsCapturing StreamIsCapturing}. A unique id representing the capture sequence may be queried via {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo}.
*
* If {@code mode} is not {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}, {@link #cuStreamEndCapture StreamEndCapture} must be called on this stream from the same thread.
*
* Note
*
* Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is
* undefined behavior. This restriction does not apply to texture and surface objects.
*
* @param hStream stream in which to initiate capture
* @param mode controls the interaction of this capture sequence with other API calls that are potentially unsafe. For more details see
* {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode}.
*/
@NativeType("CUresult")
public static int cuStreamBeginCapture_v2(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureMode") int mode) {
long __functionAddress = Functions.StreamBeginCapture_v2;
if (CHECKS) {
check(__functionAddress);
}
return callPI(hStream, mode, __functionAddress);
}
// --- [ cuThreadExchangeStreamCaptureMode ] ---
/** Unsafe version of: {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode} */
public static int ncuThreadExchangeStreamCaptureMode(long mode) {
long __functionAddress = Functions.ThreadExchangeStreamCaptureMode;
if (CHECKS) {
check(__functionAddress);
}
return callPI(mode, __functionAddress);
}
/**
* Swaps the stream capture interaction mode for a thread.
*
* Sets the calling thread's stream capture interaction mode to the value contained in {@code *mode}, and overwrites {@code *mode} with the previous mode
* for the thread. To facilitate deterministic behavior across function or module boundaries, callers are encouraged to use this API in a push-pop
* fashion:
*
*
* CUstreamCaptureMode mode = desiredMode
* cuThreadExchangeStreamCaptureMode(&mode);
* ...
* cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode
*
* During stream capture (see {@link #cuStreamBeginCapture StreamBeginCapture}), some actions, such as a call to {@code cudaMalloc}, may be unsafe. In the case of {@code cudaMalloc},
* the operation is not enqueued asynchronously to a stream, and is not observed by stream capture. Therefore, if the sequence of operations captured via
* {@link #cuStreamBeginCapture StreamBeginCapture} depended on the allocation being replayed whenever the graph is launched, the captured graph would be invalid.
*
* Therefore, stream capture places restrictions on API calls that can be made within or concurrently to a {@link #cuStreamBeginCapture StreamBeginCapture}-{@link #cuStreamEndCapture StreamEndCapture}
* sequence. This behavior can be controlled via this API and flags to {@code cuStreamBeginCapture}.
*
* A thread's mode is one of the following:
*
*
* - {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}: This is the default mode.
*
*
If the local thread has an ongoing capture sequence that was not initiated with {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED} at {@link #cuStreamBeginCapture StreamBeginCapture}, or if any
* other thread has a concurrent capture sequence initiated with {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}, this thread is prohibited from potentially unsafe API
* calls.
* - {@link #CU_STREAM_CAPTURE_MODE_THREAD_LOCAL STREAM_CAPTURE_MODE_THREAD_LOCAL}: If the local thread has an ongoing capture sequence not initiated with {@code CU_STREAM_CAPTURE_MODE_RELAXED},
* it is prohibited from potentially unsafe API calls. Concurrent capture sequences in other threads are ignored.
* - {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}: The local thread is not prohibited from potentially unsafe API calls. Note that the thread is still prohibited from
* API calls which necessarily conflict with stream capture, for example, attempting {@link #cuEventQuery EventQuery} on an event that was last recorded inside a capture
* sequence.
*
*
* @param mode pointer to mode value to swap with the current mode
*/
@NativeType("CUresult")
public static int cuThreadExchangeStreamCaptureMode(@NativeType("CUstreamCaptureMode *") IntBuffer mode) {
if (CHECKS) {
check(mode, 1);
}
return ncuThreadExchangeStreamCaptureMode(memAddress(mode));
}
// --- [ cuStreamEndCapture ] ---
/** Unsafe version of: {@link #cuStreamEndCapture StreamEndCapture} */
public static int ncuStreamEndCapture(long hStream, long phGraph) {
long __functionAddress = Functions.StreamEndCapture;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(hStream, phGraph, __functionAddress);
}
/**
* Ends capture on a stream, returning the captured graph.
*
* End capture on {@code hStream}, returning the captured graph via {@code phGraph}. Capture must have been initiated on {@code hStream} via a call to
* {@link #cuStreamBeginCapture StreamBeginCapture}. If capture was invalidated, due to a violation of the rules of stream capture, then a NULL graph will be returned.
*
* If the {@code mode} argument to {@link #cuStreamBeginCapture StreamBeginCapture} was not {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}, this call must be from the same thread as
* {@link #cuStreamBeginCapture StreamBeginCapture}.
*
* @param hStream stream to query
* @param phGraph the captured graph
*/
@NativeType("CUresult")
public static int cuStreamEndCapture(@NativeType("CUstream") long hStream, @NativeType("CUgraph *") PointerBuffer phGraph) {
if (CHECKS) {
check(phGraph, 1);
}
return ncuStreamEndCapture(hStream, memAddress(phGraph));
}
// --- [ cuStreamIsCapturing ] ---
/** Unsafe version of: {@link #cuStreamIsCapturing StreamIsCapturing} */
public static int ncuStreamIsCapturing(long hStream, long captureStatus) {
long __functionAddress = Functions.StreamIsCapturing;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(hStream, captureStatus, __functionAddress);
}
/**
* Returns a stream's capture status.
*
* Return the capture status of {@code hStream} via {@code captureStatus}. After a successful call, {@code *captureStatus} will contain one of the
* following:
*
*
* - {@link #CU_STREAM_CAPTURE_STATUS_NONE STREAM_CAPTURE_STATUS_NONE}: The stream is not capturing.
* - {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}: The stream is capturing.
* - {@link #CU_STREAM_CAPTURE_STATUS_INVALIDATED STREAM_CAPTURE_STATUS_INVALIDATED}: The stream was capturing but an error has invalidated the capture sequence. The capture sequence must be
* terminated with {@link #cuStreamEndCapture StreamEndCapture} on the stream where it was initiated in order to continue using {@code hStream}.
*
*
* Note that, if this is called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a blocking stream in the same context is capturing, it will return
* {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT} and {@code *captureStatus} is unspecified after the call. The blocking stream capture is not invalidated.
*
* When a blocking stream is capturing, the legacy stream is in an unusable state until the blocking stream capture is terminated. The legacy stream is
* not supported for stream capture, but attempted use would have an implicit dependency on the capturing stream(s).
*
* @param hStream stream to query
* @param captureStatus returns the stream's capture status
*/
@NativeType("CUresult")
public static int cuStreamIsCapturing(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus) {
if (CHECKS) {
check(captureStatus, 1);
}
return ncuStreamIsCapturing(hStream, memAddress(captureStatus));
}
// --- [ cuStreamGetCaptureInfo ] ---
/** Unsafe version of: {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo} */
public static int ncuStreamGetCaptureInfo(long hStream, long captureStatus, long id) {
long __functionAddress = Functions.StreamGetCaptureInfo;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(hStream, captureStatus, id, __functionAddress);
}
/**
* Query capture status of a stream.
*
* Query the capture status of a stream and and get an id for the capture sequence, which is unique over the lifetime of the process.
*
* If called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a stream not created with {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} is capturing, returns
* {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT}.
*
* A valid id is returned only if both of the following are true:
*
*
* - the call returns {@link NVRTC#NVRTC_SUCCESS SUCCESS}
* - {@code captureStatus} is set to {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}
*
*/
@NativeType("CUresult")
public static int cuStreamGetCaptureInfo(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus, @NativeType("cuuint64_t *") LongBuffer id) {
if (CHECKS) {
check(captureStatus, 1);
check(id, 1);
}
return ncuStreamGetCaptureInfo(hStream, memAddress(captureStatus), memAddress(id));
}
// --- [ cuStreamGetCaptureInfo_v2 ] ---
/** Unsafe version of: {@link #cuStreamGetCaptureInfo_v2 StreamGetCaptureInfo_v2} */
public static int ncuStreamGetCaptureInfo_v2(long hStream, long captureStatus_out, long id_out, long graph_out, long dependencies_out, long numDependencies_out) {
long __functionAddress = Functions.StreamGetCaptureInfo_v2;
if (CHECKS) {
check(__functionAddress);
}
return callPPPPPPI(hStream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out, __functionAddress);
}
/**
* Query a stream's capture state (11.3+).
*
* Query stream state related to stream capture.
*
* If called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a stream not created with {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} is capturing, returns
* {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT}.
*
* Valid data (other than capture status) is returned only if both of the following are true:
*
*
* - the call returns CUDA_SUCCESS
* - the returned capture status is {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}
*
*
* This version of {@code cuStreamGetCaptureInfo} is introduced in CUDA 11.3 and will supplant the previous version in 12.0. Developers requiring
* compatibility across minor versions to CUDA 11.0 (driver version 445) should use {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo} or include a fallback path.
*
* @param hStream the stream to query
* @param captureStatus_out location to return the capture status of the stream; required
* @param id_out optional location to return an id for the capture sequence, which is unique over the lifetime of the process
* @param graph_out optional location to return the graph being captured into.
*
* All operations other than destroy and node removal are permitted on the graph while the capture sequence is in progress. This API does not transfer
* ownership of the graph, which is transferred or destroyed at {@link #cuStreamEndCapture StreamEndCapture}. Note that the graph handle may be invalidated before end of
* capture for certain errors. Nodes that are or become unreachable from the original stream at {@link #cuStreamEndCapture StreamEndCapture} due to direct actions on the graph
* do not trigger {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED}.
* @param dependencies_out optional location to store a pointer to an array of nodes.
*
* The next node to be captured in the stream will depend on this set of nodes, absent operations such as event wait which modify this set. The array
* pointer is valid until the next API call which operates on the stream or until end of capture. The node handles may be copied out and are valid
* until they or the graph is destroyed. The driver-owned array may also be passed directly to APIs that operate on the graph (not the stream) without
* copying.
* @param numDependencies_out optional location to store the size of the array returned in {@code dependencies_out}
*/
@NativeType("CUresult")
public static int cuStreamGetCaptureInfo_v2(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus_out, @Nullable @NativeType("cuuint64_t *") LongBuffer id_out, @Nullable @NativeType("CUgraph *") PointerBuffer graph_out, @Nullable @NativeType("CUgraphNode const **") PointerBuffer dependencies_out, @Nullable @NativeType("size_t *") PointerBuffer numDependencies_out) {
if (CHECKS) {
check(captureStatus_out, 1);
checkSafe(id_out, 1);
checkSafe(graph_out, 1);
checkSafe(dependencies_out, 1);
checkSafe(numDependencies_out, 1);
}
return ncuStreamGetCaptureInfo_v2(hStream, memAddress(captureStatus_out), memAddressSafe(id_out), memAddressSafe(graph_out), memAddressSafe(dependencies_out), memAddressSafe(numDependencies_out));
}
// --- [ cuStreamUpdateCaptureDependencies ] ---
/** Unsafe version of: {@link #cuStreamUpdateCaptureDependencies StreamUpdateCaptureDependencies} */
public static int ncuStreamUpdateCaptureDependencies(long hStream, long dependencies, long numDependencies, int flags) {
long __functionAddress = Functions.StreamUpdateCaptureDependencies;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(hStream, dependencies, numDependencies, flags, __functionAddress);
}
/**
* Update the set of dependencies in a capturing stream (11.3+).
*
* Modifies the dependency set of a capturing stream. The dependency set is the set of nodes that the next captured node in the stream will depend on.
*
* Valid flags are {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES} and {@link #CU_STREAM_SET_CAPTURE_DEPENDENCIES STREAM_SET_CAPTURE_DEPENDENCIES}. These control whether the set passed to the API is added
* to the existing set or replaces it. A flags value of 0 defaults to {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES}.
*
* Nodes that are removed from the dependency set via this API do not result in {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED} if they are unreachable from the
* stream at {@link #cuStreamEndCapture StreamEndCapture}.
*
* Returns {@link #CUDA_ERROR_ILLEGAL_STATE} if the stream is not capturing.
*
* This API is new in CUDA 11.3. Developers requiring compatibility across minor versions to CUDA 11.0 should not use this API or provide a fallback.
*/
@NativeType("CUresult")
public static int cuStreamUpdateCaptureDependencies(@NativeType("CUstream") long hStream, @NativeType("CUgraphNode *") PointerBuffer dependencies, @NativeType("unsigned int") int flags) {
return ncuStreamUpdateCaptureDependencies(hStream, memAddress(dependencies), dependencies.remaining(), flags);
}
// --- [ cuStreamAttachMemAsync ] ---
/**
* Attach memory to a stream asynchronously.
*
* Enqueues an operation in {@code hStream} to specify stream association of {@code length} bytes of memory starting from {@code dptr}. This function is a
* stream-ordered operation, meaning that it is dependent on, and will only take effect when, previous work in stream has completed. Any previous
* association is automatically replaced.
*
* {@code dptr} must point to one of the following types of memories:
*
*
* - managed memory declared using the __managed__ keyword or allocated with {@link #cuMemAllocManaged MemAllocManaged}.
* - a valid host-accessible region of system-allocated pageable memory. This type of memory may only be specified if the device associated with the
* stream reports a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}.
*
*
* For managed allocations, {@code length} must be either zero or the entire allocation's size. Both indicate that the entire allocation's stream
* association is being changed. Currently, it is not possible to change stream association for a portion of a managed allocation.
*
* For pageable host allocations, {@code length} must be non-zero.
*
* The stream association is specified using {@code flags} which must be one of {@code CUmemAttach_flags}. If the {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} flag is specified,
* the memory can be accessed by any stream on any device. If the {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} flag is specified, the program makes a guarantee that it won't access
* the memory on the device from any stream on a device that has a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If
* the {@link #CU_MEM_ATTACH_SINGLE MEM_ATTACH_SINGLE} flag is specified and {@code hStream} is associated with a device that has a zero value for the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}, the program makes a guarantee that it will only access the memory on the device from {@code hStream}.
* It is illegal to attach singly to the NULL stream, because the NULL stream is a virtual global stream and not a specific stream. An error will be
* returned in this case.
*
* When memory is associated with a single stream, the Unified Memory system will allow CPU access to this memory region so long as all operations in
* {@code hStream} have completed, regardless of whether other streams are active. In effect, this constrains exclusive ownership of the managed memory
* region by an active GPU to per-stream activity instead of whole-GPU activity.
*
* Accessing memory on the device from streams that are not associated with it will produce undefined results. No error checking is performed by the
* Unified Memory system to ensure that kernels launched into other streams do not access this region.
*
* It is a program's responsibility to order calls to {@link #cuStreamAttachMemAsync StreamAttachMemAsync} via events, synchronization or other means to ensure legal access to memory
* at all times. Data visibility and coherency will be changed appropriately for all kernels which follow a stream-association change.
*
* If {@code hStream} is destroyed while data is associated with it, the association is removed and the association reverts to the default visibility of
* the allocation as specified at {@link #cuMemAllocManaged MemAllocManaged}. For __managed__ variables, the default association is always {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL}. Note that
* destroying a stream is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has
* completed.
*
* @param hStream stream in which to enqueue the attach operation
* @param dptr pointer to memory (must be a pointer to managed memory or to a valid host-accessible region of system-allocated pageable memory)
* @param length length of memory
* @param flags must be one of {@code CUmemAttach_flags}
*/
@NativeType("CUresult")
public static int cuStreamAttachMemAsync(@NativeType("CUstream") long hStream, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long length, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.StreamAttachMemAsync;
if (CHECKS) {
check(__functionAddress);
check(dptr);
}
return callPPPI(hStream, dptr, length, flags, __functionAddress);
}
// --- [ cuStreamQuery ] ---
/**
* Determine status of a compute stream.
*
* Returns {@link #CUDA_SUCCESS} if all operations in the stream specified by {@code hStream} have completed, or {@link #CUDA_ERROR_NOT_READY} if not.
*
* For the purposes of Unified Memory, a return value of {@link #CUDA_SUCCESS} is equivalent to having called {@link #cuStreamSynchronize StreamSynchronize}.
*
* @param hStream stream to query status of
*/
@NativeType("CUresult")
public static int cuStreamQuery(@NativeType("CUstream") long hStream) {
long __functionAddress = Functions.StreamQuery;
return callPI(hStream, __functionAddress);
}
// --- [ cuStreamSynchronize ] ---
/**
* Wait until a stream's tasks are completed.
*
* Waits until the device has completed all operations in the stream specified by {@code hStream}. If the context was created with the
* {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} flag, the CPU thread will block until the stream is finished with all of its tasks.
*
* @param hStream stream to wait for
*/
@NativeType("CUresult")
public static int cuStreamSynchronize(@NativeType("CUstream") long hStream) {
long __functionAddress = Functions.StreamSynchronize;
return callPI(hStream, __functionAddress);
}
// --- [ cuStreamDestroy ] ---
/**
* Destroys a stream.
*
* Destroys the stream specified by {@code hStream}.
*
* In case the device is still doing work in the stream {@code hStream} when {@link #cuStreamDestroy StreamDestroy} is called, the function will return immediately and the
* resources associated with {@code hStream} will be released automatically once the device has completed all work in {@code hStream}.
*
* @param hStream stream to destroy
*/
@NativeType("CUresult")
public static int cuStreamDestroy(@NativeType("CUstream") long hStream) {
long __functionAddress = Functions.StreamDestroy;
if (CHECKS) {
check(__functionAddress);
}
return callPI(hStream, __functionAddress);
}
// --- [ cuStreamCopyAttributes ] ---
/**
* Copies attributes from source stream to destination stream.
*
* Copies attributes from source stream {@code src} to destination stream {@code dst}. Both streams must have the same context.
*
* @param dst destination stream
* @param src source stream For list of attributes see {@code CUstreamAttrID}
*/
@NativeType("CUresult")
public static int cuStreamCopyAttributes(@NativeType("CUstream") long dst, @NativeType("CUstream") long src) {
long __functionAddress = Functions.StreamCopyAttributes;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(dst, src, __functionAddress);
}
// --- [ cuStreamGetAttribute ] ---
/** Unsafe version of: {@link #cuStreamGetAttribute StreamGetAttribute} */
public static int ncuStreamGetAttribute(long hStream, int attr, long value_out) {
long __functionAddress = Functions.StreamGetAttribute;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(hStream, attr, value_out, __functionAddress);
}
/**
* Queries stream attribute.
*
* Queries attribute {@code attr} from {@code hStream} and stores it in corresponding member of {@code value_out}.
*/
@NativeType("CUresult")
public static int cuStreamGetAttribute(@NativeType("CUstream") long hStream, @NativeType("CUstreamAttrID") int attr, @NativeType("CUstreamAttrValue *") CUstreamAttrValue value_out) {
return ncuStreamGetAttribute(hStream, attr, value_out.address());
}
// --- [ cuStreamSetAttribute ] ---
/** Unsafe version of: {@link #cuStreamSetAttribute StreamSetAttribute} */
public static int ncuStreamSetAttribute(long hStream, int attr, long value) {
long __functionAddress = Functions.StreamSetAttribute;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(hStream, attr, value, __functionAddress);
}
/**
* Sets stream attribute.
*
* Sets attribute {@code attr} on {@code hStream} from corresponding attribute of {@code value}. The updated attribute will be applied to subsequent work
* submitted to the stream. It will not affect previously submitted work.
*/
@NativeType("CUresult")
public static int cuStreamSetAttribute(@NativeType("CUstream") long hStream, @NativeType("CUstreamAttrID") int attr, @NativeType("CUstreamAttrValue const *") CUstreamAttrValue value) {
return ncuStreamSetAttribute(hStream, attr, value.address());
}
// --- [ cuEventCreate ] ---
/** Unsafe version of: {@link #cuEventCreate EventCreate} */
public static int ncuEventCreate(long phEvent, int Flags) {
long __functionAddress = Functions.EventCreate;
return callPI(phEvent, Flags, __functionAddress);
}
/**
* Creates an event.
*
* Creates an event {@code *phEvent} for the current context with the flags specified via {@code Flags}. Valid flags include:
*
*
* - {@link #CU_EVENT_DEFAULT EVENT_DEFAULT}: Default event creation flag.
* - {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC}: Specifies that the created event should use blocking synchronization. A CPU thread that uses {@link #cuEventSynchronize EventSynchronize} to
* wait on an event created with this flag will block until the event has actually been recorded.
* - {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING}: Specifies that the created event does not need to record timing data. Events created with this flag specified and the
* {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag not specified will provide the best performance when used with {@link #cuStreamWaitEvent StreamWaitEvent} and {@link #cuEventQuery EventQuery}.
* - {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS}: Specifies that the created event may be used as an interprocess event by {@link #cuIpcGetEventHandle IpcGetEventHandle}. {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS}
* must be specified along with {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING}.
*
*
* @param phEvent returns newly created event
* @param Flags event creation flags
*/
@NativeType("CUresult")
public static int cuEventCreate(@NativeType("CUevent *") PointerBuffer phEvent, @NativeType("unsigned int") int Flags) {
if (CHECKS) {
check(phEvent, 1);
}
return ncuEventCreate(memAddress(phEvent), Flags);
}
// --- [ cuEventRecord ] ---
/**
* Records an event.
*
* Captures in {@code hEvent} the contents of {@code hStream} at the time of this call. {@code hEvent} and {@code hStream} must be from the same context.
* Calls such as {@link #cuEventQuery EventQuery} or {@link #cuStreamWaitEvent StreamWaitEvent} will then examine or wait for completion of the work that was captured. Uses of {@code hStream}
* after this call do not modify {@code hEvent}. See note on default stream behavior for what is captured in the default case.
*
* {@link #cuEventRecord EventRecord} can be called multiple times on the same event and will overwrite the previously captured state. Other APIs such as
* {@link #cuStreamWaitEvent StreamWaitEvent} use the most recently captured state at the time of the API call, and are not affected by later calls to {@link #cuEventRecord EventRecord}.
* Before the first call to {@link #cuEventRecord EventRecord}, an event represents an empty set of work, so for example {@link #cuEventQuery EventQuery} would return {@link #CUDA_SUCCESS}.
*
* @param hEvent event to record
* @param hStream stream to record event for
*/
@NativeType("CUresult")
public static int cuEventRecord(@NativeType("CUevent") long hEvent, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.EventRecord;
if (CHECKS) {
check(hEvent);
}
return callPPI(hEvent, hStream, __functionAddress);
}
// --- [ cuEventRecordWithFlags ] ---
/**
* Records an event.
*
* Captures in {@code hEvent} the contents of {@code hStream} at the time of this call. {@code hEvent} and {@code hStream} must be from the same context.
* Calls such as {@link #cuEventQuery EventQuery} or {@link #cuStreamWaitEvent StreamWaitEvent} will then examine or wait for completion of the work that was captured. Uses of {@code hStream}
* after this call do not modify {@code hEvent}. See note on default stream behavior for what is captured in the default case.
*
* {@link #cuEventRecordWithFlags EventRecordWithFlags} can be called multiple times on the same event and will overwrite the previously captured state. Other APIs such as
* {@link #cuStreamWaitEvent StreamWaitEvent} use the most recently captured state at the time of the API call, and are not affected by later calls to
* {@link #cuEventRecordWithFlags EventRecordWithFlags}. Before the first call to {@link #cuEventRecordWithFlags EventRecordWithFlags}, an event represents an empty set of work, so for example
* {@link #cuEventQuery EventQuery} would return {@link #CUDA_SUCCESS}.
*
* flags include:
*
*
* - {@link #CU_EVENT_RECORD_DEFAULT EVENT_RECORD_DEFAULT}: Default event creation flag.
* - {@link #CU_EVENT_RECORD_EXTERNAL EVENT_RECORD_EXTERNAL}: Event is captured in the graph as an external event node when performing stream capture. This flag is invalid outside
* of stream capture.
*
*
* @param hEvent event to record
* @param hStream stream to record event for
* @param flags see {@code CUevent_capture_flags}
*/
@NativeType("CUresult")
public static int cuEventRecordWithFlags(@NativeType("CUevent") long hEvent, @NativeType("CUstream") long hStream, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.EventRecordWithFlags;
if (CHECKS) {
check(__functionAddress);
check(hEvent);
}
return callPPI(hEvent, hStream, flags, __functionAddress);
}
// --- [ cuEventQuery ] ---
/**
* Queries an event's status.
*
* Queries the status of all work currently captured by {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event.
*
* Returns {@link #CUDA_SUCCESS} if all captured work has been completed, or {@link #CUDA_ERROR_NOT_READY} if any captured work is incomplete.
*
* For the purposes of Unified Memory, a return value of {@link #CUDA_SUCCESS} is equivalent to having called {@link #cuEventSynchronize EventSynchronize}.
*
* @param hEvent event to query
*/
@NativeType("CUresult")
public static int cuEventQuery(@NativeType("CUevent") long hEvent) {
long __functionAddress = Functions.EventQuery;
if (CHECKS) {
check(hEvent);
}
return callPI(hEvent, __functionAddress);
}
// --- [ cuEventSynchronize ] ---
/**
* Waits for an event to complete.
*
* Waits until the completion of all work currently captured in {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event.
*
* Waiting for an event that was created with the {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag will cause the calling CPU thread to block until the event has been
* completed by the device. If the {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag has not been set, then the CPU thread will busy-wait until the event has been completed
* by the device.
*
* @param hEvent event to wait for
*/
@NativeType("CUresult")
public static int cuEventSynchronize(@NativeType("CUevent") long hEvent) {
long __functionAddress = Functions.EventSynchronize;
if (CHECKS) {
check(hEvent);
}
return callPI(hEvent, __functionAddress);
}
// --- [ cuEventDestroy ] ---
/**
* Destroys an event.
*
* Destroys the event specified by {@code hEvent}.
*
* An event may be destroyed before it is complete (i.e., while {@link #cuEventQuery EventQuery} would return {@link #CUDA_ERROR_NOT_READY}). In this case, the call does not
* block on completion of the event, and any associated resources will automatically be released asynchronously at completion.
*
* @param hEvent event to destroy
*/
@NativeType("CUresult")
public static int cuEventDestroy(@NativeType("CUevent") long hEvent) {
long __functionAddress = Functions.EventDestroy;
if (CHECKS) {
check(__functionAddress);
check(hEvent);
}
return callPI(hEvent, __functionAddress);
}
// --- [ cuEventElapsedTime ] ---
/** Unsafe version of: {@link #cuEventElapsedTime EventElapsedTime} */
public static int ncuEventElapsedTime(long pMilliseconds, long hStart, long hEnd) {
long __functionAddress = Functions.EventElapsedTime;
if (CHECKS) {
check(hStart);
check(hEnd);
}
return callPPPI(pMilliseconds, hStart, hEnd, __functionAddress);
}
/**
* Computes the elapsed time between two events.
*
* Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds).
*
* If either event was last recorded in a non-{@code NULL} stream, the resulting time may be greater than expected (even if both used the same stream handle).
* This happens because the {@link #cuEventRecord EventRecord} operation takes place asynchronously and there is no guarantee that the measured latency is actually just
* between the two events. Any number of other different stream operations could execute in between the two measured events, thus altering the timing in a
* significant way.
*
* If {@link #cuEventRecord EventRecord} has not been called on either event then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If {@link #cuEventRecord EventRecord} has been called on both
* events but one or both of them has not yet been completed (that is, {@link #cuEventQuery EventQuery} would return {@link #CUDA_ERROR_NOT_READY} on at least one of the
* events), {@link #CUDA_ERROR_NOT_READY} is returned. If either event was created with the {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flag, then this function will return
* {@link #CUDA_ERROR_INVALID_HANDLE}.
*
* @param pMilliseconds time between {@code hStart} and {@code hEnd} in ms
* @param hStart starting event
* @param hEnd ending event
*/
@NativeType("CUresult")
public static int cuEventElapsedTime(@NativeType("float *") FloatBuffer pMilliseconds, @NativeType("CUevent") long hStart, @NativeType("CUevent") long hEnd) {
if (CHECKS) {
check(pMilliseconds, 1);
}
return ncuEventElapsedTime(memAddress(pMilliseconds), hStart, hEnd);
}
// --- [ cuImportExternalMemory ] ---
/** Unsafe version of: {@link #cuImportExternalMemory ImportExternalMemory} */
public static int ncuImportExternalMemory(long extMem_out, long memHandleDesc) {
long __functionAddress = Functions.ImportExternalMemory;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(extMem_out, memHandleDesc, __functionAddress);
}
/**
* Imports an external memory object.
*
* Imports an externally allocated memory object and returns a handle to that in {@code extMem_out}.
*
* The properties of the handle being imported must be described in {@code memHandleDesc}.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a memory object.
* Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any operations on the file
* descriptor after it is imported results in undefined behavior.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name}
* must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that references a memory object.
* Ownership of this handle is not transferred to CUDA after the import operation, so the application must release the handle using the appropriate system
* call. If {@code ::handle::win32::name} is not NULL, then it must point to a NULL-terminated array of UTF-16 characters that
* refers to a memory object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT}, then {@code ::handle::win32::handle} must be non-{@code NULL} and
* {@code ::handle::win32::name} must be {@code NULL}. The handle specified must be a globally shared KMT handle. This handle does not hold a reference to the
* underlying object, and thus will be invalid when all references to the memory object are destroyed.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name}
* must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by
* {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Heap} object. This handle holds a reference to the underlying object. If
* {@code ::handle::win32::name} is not {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D12Heap}
* object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name}
* must not be NULL. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by
* {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Resource} object. This handle holds a reference to the underlying object. If
* {@code ::handle::win32::name} is not {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D12Resource}
* object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE}, then {@code ::handle::win32::handle} must represent a valid shared NT handle that is\
* returned by {@code IDXGIResource1::CreateSharedHandle} when referring to a {@code ID3D11Resource} object. If {@code ::handle::win32::name} is not
* {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D11Resource} object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT}, then {@code ::handle::win32::handle} must represent a valid shared KMT handle
* that is returned by {@code IDXGIResource::GetSharedHandle} when referring to a {@code ID3D11Resource} object and {@code ::handle::win32::name} must be
* {@code NULL}.
*
* If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, then {@code ::handle::nvSciBufObject} must be non-{@code NULL} and reference a valid
* {@code NvSciBuf} object. If the {@code NvSciBuf} object imported into CUDA is also mapped by other drivers, then the application must use
* {@link #cuWaitExternalSemaphoresAsync WaitExternalSemaphoresAsync} or {@link #cuSignalExternalSemaphoresAsync SignalExternalSemaphoresAsync} as appropriate barriers to maintain coherence between CUDA and the other drivers.
* See {@link CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC} and {@link CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC} for memory synchronization.
*
* The size of the memory object must be specified in {@code ::size}.
*
* Specifying the flag {@link #CUDA_EXTERNAL_MEMORY_DEDICATED} in {@code ::flags} indicates that the resource is a dedicated resource. The definition of what a
* dedicated resource is outside the scope of this extension. This flag must be set if {@code ::type} is one of the following:
*
*
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE}
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE}
* - {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT}
*
*
* Note
*
* If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use
* {@code vkInvalidateMappedMemoryRanges}/{@code vkFlushMappedMemoryRanges} as well as appropriate Vulkan pipeline barriers to maintain coherence between
* CPU and GPU. For more information on these APIs, please refer to "Synchronization and Cache Control" chapter from Vulkan specification.
*
* @param extMem_out returned handle to an external memory object
* @param memHandleDesc memory import handle descriptor
*/
@NativeType("CUresult")
public static int cuImportExternalMemory(@NativeType("CUexternalMemory *") PointerBuffer extMem_out, @NativeType("CUDA_EXTERNAL_MEMORY_HANDLE_DESC const *") CUDA_EXTERNAL_MEMORY_HANDLE_DESC memHandleDesc) {
if (CHECKS) {
check(extMem_out, 1);
}
return ncuImportExternalMemory(memAddress(extMem_out), memHandleDesc.address());
}
// --- [ cuExternalMemoryGetMappedBuffer ] ---
/** Unsafe version of: {@link #cuExternalMemoryGetMappedBuffer ExternalMemoryGetMappedBuffer} */
public static int ncuExternalMemoryGetMappedBuffer(long devPtr, long extMem, long bufferDesc) {
long __functionAddress = Functions.ExternalMemoryGetMappedBuffer;
if (CHECKS) {
check(__functionAddress);
check(extMem);
}
return callPPPI(devPtr, extMem, bufferDesc, __functionAddress);
}
/**
* Maps a buffer onto an imported memory object.
*
* Maps a buffer onto an imported memory object and returns a device pointer in {@code devPtr}.
*
* The properties of the buffer being mapped must be described in {@code bufferDesc}.
*
* The offset and size have to be suitably aligned to match the requirements of the external API. Mapping two buffers whose ranges overlap may or may not
* result in the same virtual address being returned for the overlapped portion. In such cases, the application must ensure that all accesses to that
* region from the GPU are volatile. Otherwise writes made via one address are not guaranteed to be visible via the other address, even if they're issued
* by the same thread. It is recommended that applications map the combined range instead of mapping separate buffers and then apply the appropriate
* offsets to the returned pointer to derive the individual buffers.
*
* The returned pointer {@code devPtr} must be freed using {@link #cuMemFree MemFree}.
*
* @param devPtr returned device pointer to buffer
* @param extMem handle to external memory object
* @param bufferDesc buffer descriptor
*/
@NativeType("CUresult")
public static int cuExternalMemoryGetMappedBuffer(@NativeType("CUdeviceptr *") PointerBuffer devPtr, @NativeType("CUexternalMemory") long extMem, @NativeType("CUDA_EXTERNAL_MEMORY_BUFFER_DESC const *") CUDA_EXTERNAL_MEMORY_BUFFER_DESC bufferDesc) {
if (CHECKS) {
check(devPtr, 1);
}
return ncuExternalMemoryGetMappedBuffer(memAddress(devPtr), extMem, bufferDesc.address());
}
// --- [ cuExternalMemoryGetMappedMipmappedArray ] ---
/** Unsafe version of: {@link #cuExternalMemoryGetMappedMipmappedArray ExternalMemoryGetMappedMipmappedArray} */
public static int ncuExternalMemoryGetMappedMipmappedArray(long mipmap, long extMem, long mipmapDesc) {
long __functionAddress = Functions.ExternalMemoryGetMappedMipmappedArray;
if (CHECKS) {
check(__functionAddress);
check(extMem);
}
return callPPPI(mipmap, extMem, mipmapDesc, __functionAddress);
}
/**
* Maps a CUDA mipmapped array onto an external memory object.
*
* Maps a CUDA mipmapped array onto an external object and returns a handle to it in {@code mipmap}.
*
* The properties of the CUDA mipmapped array being mapped must be described in {@code mipmapDesc}.
*
* {@code ::offset} is the offset in the memory object where the base level of the mipmap chain is. {@code ::arrayDesc} describes the format, dimensions
* and type of the base level of the mipmap chain. For further details on these parameters, please refer to the documentation for {@link #cuMipmappedArrayCreate MipmappedArrayCreate}.
* Note that if the mipmapped array is bound as a color target in the graphics API, then the flag {@link #CUDA_ARRAY3D_COLOR_ATTACHMENT} must be specified in
* {@code ::arrayDesc::Flags}. {@code ::numLevels} specifies the total number of levels in the mipmap chain.
*
* If {@code extMem} was imported from a handle of type {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, then {@code ::numLevels} must be equal to 1.
*
* The returned CUDA mipmapped array must be freed using {@link #cuMipmappedArrayDestroy MipmappedArrayDestroy}.
*
* @param mipmap returned CUDA mipmapped array
* @param extMem handle to external memory object
* @param mipmapDesc CUDA array descriptor
*/
@NativeType("CUresult")
public static int cuExternalMemoryGetMappedMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer mipmap, @NativeType("CUexternalMemory") long extMem, @NativeType("CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC const *") CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc) {
if (CHECKS) {
check(mipmap, 1);
}
return ncuExternalMemoryGetMappedMipmappedArray(memAddress(mipmap), extMem, mipmapDesc.address());
}
// --- [ cuDestroyExternalMemory ] ---
/**
* Destroys an external memory object.
*
* Destroys the specified external memory object. Any existing buffers and CUDA mipmapped arrays mapped onto this object must no longer be used and must
* be explicitly freed using {@link #cuMemFree MemFree} and {@link #cuMipmappedArrayDestroy MipmappedArrayDestroy} respectively.
*
* @param extMem external memory object to be destroyed
*/
@NativeType("CUresult")
public static int cuDestroyExternalMemory(@NativeType("CUexternalMemory") long extMem) {
long __functionAddress = Functions.DestroyExternalMemory;
if (CHECKS) {
check(__functionAddress);
check(extMem);
}
return callPI(extMem, __functionAddress);
}
// --- [ cuImportExternalSemaphore ] ---
/** Unsafe version of: {@link #cuImportExternalSemaphore ImportExternalSemaphore} */
public static int ncuImportExternalSemaphore(long extSem_out, long semHandleDesc) {
long __functionAddress = Functions.ImportExternalSemaphore;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(extSem_out, semHandleDesc, __functionAddress);
}
/**
* Imports an external semaphore.
*
* Imports an externally allocated synchronization object and returns a handle to that in {@code extSem_out}.
*
* The properties of the handle being imported must be described in {@code semHandleDesc}.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a synchronization
* object. Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any operations on the
* file descriptor after it is imported results in undefined behavior.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, then exactly one of {@code ::handle::win32::handle} and
* {@code ::handle::win32::name} must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that
* references a synchronization object. Ownership of this handle is not transferred to CUDA after the import operation, so the application must release
* the handle using the appropriate system call. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT}, then {@code ::handle::win32::handle} must be non-NULL and
* {@code ::handle::win32::name} must be {@code NULL}. The handle specified must be a globally shared KMT handle. This handle does not hold a reference to the
* underlying object, and thus will be invalid when all references to the synchronization object are destroyed.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name}
* must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by
* {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Fence} object. This handle holds a reference to the underlying object. If
* {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object that refers to a valid {@code ID3D12Fence} object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, then {@code ::handle::win32::handle} represents a valid shared NT handle that is
* returned by {@code ID3D11Fence::CreateSharedHandle}. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object
* that refers to a valid {@code ID3D11Fence} object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, then {@code ::handle::nvSciSyncObj} represents a valid {@code NvSciSyncObj}.
*
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX}, then {@code ::handle::win32::handle} represents a valid shared NT handle that is returned by
* {@code IDXGIResource1::CreateSharedHandle} when referring to a {@code IDXGIKeyedMutex} object. If {@code ::handle::win32::name} is not {@code NULL}, then it
* must name a valid synchronization object that refers to a valid {@code IDXGIKeyedMutex} object.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT}, then {@code ::handle::win32::handle} represents a valid shared KMT handle
* that is returned by {@code IDXGIResource::GetSharedHandle} when referring to a {@code IDXGIKeyedMutex} object and {@code ::handle::win32::name} must be
* {@code NULL}.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a
* synchronization object. Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any
* operations on the file descriptor after it is imported results in undefined behavior.
*
* If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32}, then exactly one of {@code ::handle::win32::handle} and
* {@code ::handle::win32::name} must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that
* references a synchronization object. Ownership of this handle is not transferred to CUDA after the import operation, so the application must release
* the handle using the appropriate system call. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object.
*
* @param extSem_out returned handle to an external semaphore
* @param semHandleDesc semaphore import handle descriptor
*/
@NativeType("CUresult")
public static int cuImportExternalSemaphore(@NativeType("CUexternalSemaphore *") PointerBuffer extSem_out, @NativeType("CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC const *") CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semHandleDesc) {
if (CHECKS) {
check(extSem_out, 1);
}
return ncuImportExternalSemaphore(memAddress(extSem_out), semHandleDesc.address());
}
// --- [ cuSignalExternalSemaphoresAsync ] ---
/**
* Unsafe version of: {@link #cuSignalExternalSemaphoresAsync SignalExternalSemaphoresAsync}
*
* @param numExtSems number of semaphores to signal
*/
public static int ncuSignalExternalSemaphoresAsync(long extSemArray, long paramsArray, int numExtSems, long stream) {
long __functionAddress = Functions.SignalExternalSemaphoresAsync;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(extSemArray, paramsArray, numExtSems, stream, __functionAddress);
}
/**
* Signals a set of external semaphore objects,
*
* Enqueues a signal operation on a set of externally allocated semaphore object in the specified stream. The operations will be executed when all prior
* operations in the stream complete.
*
* The exact semantics of signaling a semaphore depends on the type of the object.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} then signaling the semaphore will set it to the
* signaled state.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} then the semaphore will be set to the value specified in {@code ::params::fence::value}.
*
* If the semaphore object is of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} this API sets
* {@code ::params::nvSciSync::fence} to a value that can be used by subsequent waiters of the same {@code NvSciSync} object to order operations with
* those currently submitted in {@code stream}. Such an update will overwrite previous contents of {@code ::params::nvSciSync::fence}. By default,
* signaling such an external semaphore object causes appropriate memory synchronization operations to be performed over all external memory objects that
* are imported as {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}. This ensures that any subsequent accesses made by other importers of the same set of NvSciBuf
* memory object(s) are coherent. These operations can be skipped by specifying the flag {@link #CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC}, which can
* be used as a performance optimization when data coherency is not required. But specifying this flag in scenarios where data coherency is required
* results in undefined behavior. Also, for semaphore object of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, if the {@code NvSciSyncAttrList} used
* to create the {@code NvSciSyncObj} had not set the flags in {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} to {@link #CUDA_NVSCISYNC_ATTR_SIGNAL}, this API will return
* {@link #CUDA_ERROR_NOT_SUPPORTED}.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} then the keyed mutex will be released with the key specified in {@code ::params::keyedmutex::key}.
*
* @param extSemArray set of external semaphores to be signaled
* @param paramsArray array of semaphore parameters
* @param stream stream to enqueue the signal operations in
*/
@NativeType("CUresult")
public static int cuSignalExternalSemaphoresAsync(@NativeType("CUexternalSemaphore const *") PointerBuffer extSemArray, @NativeType("CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS const *") CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS.Buffer paramsArray, @NativeType("CUstream") long stream) {
if (CHECKS) {
check(paramsArray, extSemArray.remaining());
}
return ncuSignalExternalSemaphoresAsync(memAddress(extSemArray), paramsArray.address(), extSemArray.remaining(), stream);
}
// --- [ cuWaitExternalSemaphoresAsync ] ---
/**
* Unsafe version of: {@link #cuWaitExternalSemaphoresAsync WaitExternalSemaphoresAsync}
*
* @param numExtSems number of semaphores to wait on
*/
public static int ncuWaitExternalSemaphoresAsync(long extSemArray, long paramsArray, int numExtSems, long stream) {
long __functionAddress = Functions.WaitExternalSemaphoresAsync;
if (CHECKS) {
check(__functionAddress);
}
return callPPPI(extSemArray, paramsArray, numExtSems, stream, __functionAddress);
}
/**
* Waits on a set of external semaphore objects.
*
* Enqueues a wait operation on a set of externally allocated semaphore object in the specified stream. The operations will be executed when all prior
* operations in the stream complete.
*
* The exact semantics of waiting on a semaphore depends on the type of the object.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} then waiting on the semaphore will wait until
* the semaphore reaches the signaled state. The semaphore will then be reset to the unsignaled state. Therefore for every signal operation, there can
* only be one wait operation.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} then waiting on the semaphore will wait until the value of the semaphore is greater than
* or equal to {@code ::params::fence::value}.
*
* If the semaphore object is of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} then, waiting on the semaphore will wait until the
* {@code ::params::nvSciSync::fence} is signaled by the signaler of the NvSciSyncObj that was associated with this semaphore object. By default, waiting
* on such an external semaphore object causes appropriate memory synchronization operations to be performed over all external memory objects that are
* imported as {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}. This ensures that any subsequent accesses made by other importers of the same set of
* {@code NvSciBuf} memory object(s) are coherent. These operations can be skipped by specifying the flag
* {@link #CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC}, which can be used as a performance optimization when data coherency is not required. But
* specifying this flag in scenarios where data coherency is required results in undefined behavior. Also, for semaphore object of the type
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, if the {@code NvSciSyncAttrList} used to create the {@code NvSciSyncObj} had not set the flags in
* {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} to {@link #CUDA_NVSCISYNC_ATTR_WAIT}, this API will return {@link #CUDA_ERROR_NOT_SUPPORTED}.
*
* If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX},
* {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} then the keyed mutex will be acquired when it is released with the key specified in
* {@code ::params::keyedmutex::key} or until the timeout specified by {@code ::params::keyedmutex::timeoutMs} has lapsed. The timeout interval can either
* be a finite value specified in milliseconds or an infinite value. In case an infinite value is specified the timeout never elapses. The windows
* {@code INFINITE} macro must be used to specify infinite timeout.
*
* @param extSemArray external semaphores to be waited on
* @param paramsArray array of semaphore parameters
* @param stream stream to enqueue the wait operations in
*/
@NativeType("CUresult")
public static int cuWaitExternalSemaphoresAsync(@NativeType("CUexternalSemaphore const *") PointerBuffer extSemArray, @NativeType("CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS const *") CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS.Buffer paramsArray, @NativeType("CUstream") long stream) {
if (CHECKS) {
check(paramsArray, extSemArray.remaining());
}
return ncuWaitExternalSemaphoresAsync(memAddress(extSemArray), paramsArray.address(), extSemArray.remaining(), stream);
}
// --- [ cuDestroyExternalSemaphore ] ---
/**
* Destroys an external semaphore.
*
* Destroys an external semaphore object and releases any references to the underlying resource. Any outstanding signals or waits must have completed
* before the semaphore is destroyed.
*
* @param extSem external semaphore to be destroyed
*/
@NativeType("CUresult")
public static int cuDestroyExternalSemaphore(@NativeType("CUexternalSemaphore") long extSem) {
long __functionAddress = Functions.DestroyExternalSemaphore;
if (CHECKS) {
check(__functionAddress);
check(extSem);
}
return callPI(extSem, __functionAddress);
}
// --- [ cuStreamWaitValue32 ] ---
/**
* Wait on a memory location.
*
* Enqueues a synchronization of the stream on the given memory location. Work ordered after the operation will block until the given condition on the
* memory is satisfied. By default, the condition is to wait for {@code (int32_t)(*addr - value) >= 0}, a cyclic greater-or-equal. Other condition types
* can be specified via {@code flags}.
*
* If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}. This function cannot be
* used with managed memory ({@link #cuMemAllocManaged MemAllocManaged}).
*
* Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}.
*
* Support for {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR}.
*
* @param stream the stream to synchronize on the memory location
* @param addr the memory location to wait on
* @param value the value to compare with the memory location
* @param flags see {@code CUstreamWaitValue_flags}
*/
@NativeType("CUresult")
public static int cuStreamWaitValue32(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint32_t") int value, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.StreamWaitValue32;
if (CHECKS) {
check(__functionAddress);
check(addr);
}
return callPPI(stream, addr, value, flags, __functionAddress);
}
// --- [ cuStreamWaitValue64 ] ---
/**
* Wait on a memory location.
*
* Enqueues a synchronization of the stream on the given memory location. Work ordered after the operation will block until the given condition on the
* memory is satisfied. By default, the condition is to wait for {@code (int64_t)(*addr - value) >= 0}, a cyclic greater-or-equal. Other condition types
* can be specified via {@code flags}.
*
* If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.
*
* Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS}.
*
* @param stream the stream to synchronize on the memory location
* @param addr the memory location to wait on
* @param value the value to compare with the memory location
* @param flags see {@code CUstreamWaitValue_flags}
*/
@NativeType("CUresult")
public static int cuStreamWaitValue64(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint64_t") long value, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.StreamWaitValue64;
if (CHECKS) {
check(__functionAddress);
check(addr);
}
return callPPJI(stream, addr, value, flags, __functionAddress);
}
// --- [ cuStreamWriteValue32 ] ---
/**
* Write a value to memory.
*
* Write a value to memory. Unless the {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} flag is passed, the write is preceded by a system-wide memory fence,
* equivalent to a {@code __threadfence_system()} but scoped to the stream rather than a CUDA thread.
*
* If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}. This function cannot
* be used with managed memory ({@link #cuMemAllocManaged MemAllocManaged}).
*
* Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}.
*
* @param stream the stream to do the write in
* @param addr the device address to write to
* @param value the value to write
* @param flags see {@code CUstreamWriteValue_flags}
*/
@NativeType("CUresult")
public static int cuStreamWriteValue32(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint32_t") int value, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.StreamWriteValue32;
if (CHECKS) {
check(__functionAddress);
check(addr);
}
return callPPI(stream, addr, value, flags, __functionAddress);
}
// --- [ cuStreamWriteValue64 ] ---
/**
* Write a value to memory.
*
* Write a value to memory. Unless the {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} flag is passed, the write is preceded by a system-wide memory fence,
* equivalent to a {@code __threadfence_system()} but scoped to the stream rather than a CUDA thread.
*
* If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.
*
* Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS}.
*
* @param stream the stream to do the write in
* @param addr the device address to write to
* @param value the value to write
* @param flags see {@code CUstreamWriteValue_flags}
*/
@NativeType("CUresult")
public static int cuStreamWriteValue64(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint64_t") long value, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.StreamWriteValue64;
if (CHECKS) {
check(__functionAddress);
check(addr);
}
return callPPJI(stream, addr, value, flags, __functionAddress);
}
// --- [ cuStreamBatchMemOp ] ---
/**
* Unsafe version of: {@link #cuStreamBatchMemOp StreamBatchMemOp}
*
* @param count the number of operations in the array. Must be less than 256.
*/
public static int ncuStreamBatchMemOp(long stream, int count, long paramArray, int flags) {
long __functionAddress = Functions.StreamBatchMemOp;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(stream, count, paramArray, flags, __functionAddress);
}
/**
* Batch operations to synchronize the stream via memory operations.
*
* This is a batch version of {@link #cuStreamWaitValue32 StreamWaitValue32} and {@link #cuStreamWriteValue32 StreamWriteValue32}. Batching operations may avoid some performance overhead in both the
* API call and the device execution versus adding them to the stream in separate API calls. The operations are enqueued in the order they appear in the
* array.
*
* See {@code CUstreamBatchMemOpType} for the full set of supported operations, and {@link #cuStreamWaitValue32 StreamWaitValue32}, {@link #cuStreamWaitValue64 StreamWaitValue64}, {@link #cuStreamWriteValue32 StreamWriteValue32},
* and {@link #cuStreamWriteValue64 StreamWriteValue64} for details of specific operations.
*
* Basic support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}. See related APIs for details on
* querying support for specific operations.
*
* @param stream the stream to enqueue the operations in
* @param paramArray the types and parameters of the individual operations
* @param flags reserved for future expansion; must be 0
*/
@NativeType("CUresult")
public static int cuStreamBatchMemOp(@NativeType("CUstream") long stream, @NativeType("CUstreamBatchMemOpParams *") CUstreamBatchMemOpParams.Buffer paramArray, @NativeType("unsigned int") int flags) {
return ncuStreamBatchMemOp(stream, paramArray.remaining(), paramArray.address(), flags);
}
// --- [ cuFuncGetAttribute ] ---
/** Unsafe version of: {@link #cuFuncGetAttribute FuncGetAttribute} */
public static int ncuFuncGetAttribute(long pi, int attrib, long hfunc) {
long __functionAddress = Functions.FuncGetAttribute;
if (CHECKS) {
check(hfunc);
}
return callPPI(pi, attrib, hfunc, __functionAddress);
}
/**
* Returns information about a function.
*
* Returns in {@code *pi} the integer value of the attribute {@code attrib} on the kernel given by {@code hfunc}. The supported attributes are:
*
*
* - {@link #CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK}: The maximum number of threads per block, beyond which a launch of the function would fail. This number
* depends on both the function and the device on which the function is currently loaded.
* - {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES}: The size in bytes of statically-allocated shared memory per block required by this function. This does not
* include dynamically-allocated shared memory requested by the user at runtime.
* - {@link #CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES FUNC_ATTRIBUTE_CONST_SIZE_BYTES}: The size in bytes of user-allocated constant memory required by this function.
* - {@link #CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES}: The size in bytes of local memory used by each thread of this function.
* - {@link #CU_FUNC_ATTRIBUTE_NUM_REGS FUNC_ATTRIBUTE_NUM_REGS}: The number of registers used by each thread of this function.
* - {@link #CU_FUNC_ATTRIBUTE_PTX_VERSION FUNC_ATTRIBUTE_PTX_VERSION}: The PTX virtual architecture version for which the function was compiled. This value is the major PTX version * 10
* + the minor PTX version, so a PTX version 1.3 function would return the value 13. Note that this may return the undefined value of 0 for cubins
* compiled prior to CUDA 3.0.
* - {@link #CU_FUNC_ATTRIBUTE_BINARY_VERSION FUNC_ATTRIBUTE_BINARY_VERSION}: The binary architecture version for which the function was compiled. This value is the major binary version *
* 10 + the minor binary version, so a binary version 1.3 function would return the value 13. Note that this will return a value of 10 for legacy
* cubins that do not have a properly-encoded binary architecture version.
* - {@link #CU_FUNC_ATTRIBUTE_CACHE_MODE_CA FUNC_ATTRIBUTE_CACHE_MODE_CA}: The attribute to indicate whether the function has been compiled with user specified option "-Xptxas --dlcm=ca"
* set.
* - {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES}: The maximum size in bytes of dynamically-allocated shared memory.
* - {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT}: Preferred shared memory-L1 cache split ratio in percent of total shared memory.
*
*
* @param pi returned attribute value
* @param attrib attribute requested
* @param hfunc function to query attribute of
*/
@NativeType("CUresult")
public static int cuFuncGetAttribute(@NativeType("int *") IntBuffer pi, @NativeType("CUfunction_attribute") int attrib, @NativeType("CUfunction") long hfunc) {
if (CHECKS) {
check(pi, 1);
}
return ncuFuncGetAttribute(memAddress(pi), attrib, hfunc);
}
// --- [ cuFuncSetAttribute ] ---
/**
* Sets information about a function.
*
* This call sets the value of a specified attribute {@code attrib} on the kernel given by {@code hfunc} to an integer value specified by {@code val} This
* function returns {@link #CUDA_SUCCESS} if the new value of the attribute could be successfully set. If the set fails, this call will return an error. Not all
* attributes can have values set. Attempting to set a value on a read-only attribute will result in an error ({@link #CUDA_ERROR_INVALID_VALUE}).
*
* Supported attributes for the cuFuncSetAttribute call are:
*
*
* - {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES}: This maximum size in bytes of dynamically-allocated shared memory. The value should contain the
* requested maximum size of dynamically-allocated shared memory. The sum of this value and the function attribute
* {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES} cannot exceed the device attribute {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN}. The maximal size
* of requestable dynamic shared memory may differ by GPU architecture.
* - {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT}: On devices where the L1 cache and shared memory use the same hardware resources, this sets
* the shared memory carveout preference, in percent of the total shared memory. See {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR} This
* is only a hint, and the driver can choose a different ratio if required to execute the function.
*
*
* @param hfunc function to query attribute of
* @param attrib attribute requested
* @param value the value to set
*/
@NativeType("CUresult")
public static int cuFuncSetAttribute(@NativeType("CUfunction") long hfunc, @NativeType("CUfunction_attribute") int attrib, int value) {
long __functionAddress = Functions.FuncSetAttribute;
if (CHECKS) {
check(__functionAddress);
check(hfunc);
}
return callPI(hfunc, attrib, value, __functionAddress);
}
// --- [ cuFuncSetCacheConfig ] ---
/**
* Sets the preferred cache configuration for a device function.
*
* On devices where the L1 cache and shared memory use the same hardware resources, this sets through {@code config} the preferred cache configuration for
* the device function {@code hfunc}. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose a
* different configuration if required to execute {@code hfunc}. Any context-wide preference set via {@link #cuCtxSetCacheConfig CtxSetCacheConfig} will be overridden by this
* per-function setting unless the per-function setting is {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE}. In that case, the current context-wide setting will be used.
*
* This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.
*
* Launching a kernel with a different preference than the most recent preference setting may insert a device-side synchronization point.
*
* The supported cache configurations are:
*
*
* - {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE}: no preference for shared memory or L1 (default)
* - {@link #CU_FUNC_CACHE_PREFER_SHARED FUNC_CACHE_PREFER_SHARED}: prefer larger shared memory and smaller L1 cache
* - {@link #CU_FUNC_CACHE_PREFER_L1 FUNC_CACHE_PREFER_L1}: prefer larger L1 cache and smaller shared memory
* - {@link #CU_FUNC_CACHE_PREFER_EQUAL FUNC_CACHE_PREFER_EQUAL}: prefer equal sized L1 cache and shared memory
*
*
* @param hfunc kernel to configure cache for
* @param config requested cache configuration
*/
@NativeType("CUresult")
public static int cuFuncSetCacheConfig(@NativeType("CUfunction") long hfunc, @NativeType("CUfunc_cache") int config) {
long __functionAddress = Functions.FuncSetCacheConfig;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, config, __functionAddress);
}
// --- [ cuFuncSetSharedMemConfig ] ---
/**
* Sets the shared memory configuration for a device function.
*
* On devices with configurable shared memory banks, this function will force all subsequent launches of the specified device function to have the given
* shared memory bank size configuration. On any given launch of the function, the shared memory configuration of the device will be temporarily changed
* if needed to suit the function's preferred configuration. Changes in shared memory configuration between subsequent launches of functions, may
* introduce a device side synchronization point.
*
* Any per-function setting of shared memory bank size set via {@link #cuFuncSetSharedMemConfig FuncSetSharedMemConfig} will override the context wide setting set with
* {@link #cuCtxSetSharedMemConfig CtxSetSharedMemConfig}.
*
* Changing the shared memory bank size will not increase shared memory usage or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory, but will change what kinds of accesses to shared memory will result in
* bank conflicts.
*
* This function will do nothing on devices with fixed shared memory bank size.
*
* The supported bank configurations are:
*
*
* - {@link #CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE}: use the context's shared memory configuration when launching this function.
* - {@link #CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE}: set shared memory bank width to be natively four bytes when launching this function.
* - {@link #CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE}: set shared memory bank width to be natively eight bytes when launching this function.
*
*
* @param hfunc kernel to be given a shared memory config
* @param config requested shared memory configuration
*/
@NativeType("CUresult")
public static int cuFuncSetSharedMemConfig(@NativeType("CUfunction") long hfunc, @NativeType("CUsharedconfig") int config) {
long __functionAddress = Functions.FuncSetSharedMemConfig;
if (CHECKS) {
check(__functionAddress);
check(hfunc);
}
return callPI(hfunc, config, __functionAddress);
}
// --- [ cuFuncGetModule ] ---
/** Unsafe version of: {@link #cuFuncGetModule FuncGetModule} */
public static int ncuFuncGetModule(long hmod, long hfunc) {
long __functionAddress = Functions.FuncGetModule;
if (CHECKS) {
check(__functionAddress);
check(hfunc);
}
return callPPI(hmod, hfunc, __functionAddress);
}
/**
* Returns a module handle.
*
* Returns in {@code *hmod} the handle of the module that function {@code hfunc} is located in. The lifetime of the module corresponds to the lifetime of
* the context it was loaded in or until the module is explicitly unloaded.
*
* The CUDA runtime manages its own modules loaded into the primary context. If the handle returned by this API refers to a module loaded by the CUDA
* runtime, calling {@link #cuModuleUnload ModuleUnload} on that module will result in undefined behavior.
*
* @param hmod returned module handle
* @param hfunc function to retrieve module for
*/
@NativeType("CUresult")
public static int cuFuncGetModule(@NativeType("CUmodule *") PointerBuffer hmod, @NativeType("CUfunction") long hfunc) {
if (CHECKS) {
check(hmod, 1);
}
return ncuFuncGetModule(memAddress(hmod), hfunc);
}
// --- [ cuLaunchKernel ] ---
/** Unsafe version of: {@link #cuLaunchKernel LaunchKernel} */
public static int ncuLaunchKernel(long f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, long hStream, long kernelParams, long extra) {
long __functionAddress = Functions.LaunchKernel;
if (CHECKS) {
check(__functionAddress);
check(f);
}
return callPPPPI(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, __functionAddress);
}
/**
* Launches a CUDA function.
*
* Invokes the kernel {@code f} on a {@code gridDimX} x {@code gridDimY} x {@code gridDimZ} grid of blocks. Each block contains {@code blockDimX} x {@code
* blockDimY} x {@code blockDimZ} threads.
*
* {@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.
*
* Kernel parameters to {@code f} can be specified in one of two ways:
*
*
* - Kernel parameters can be specified via {@code kernelParams}.
*
*
If {@code f} has N parameters, then {@code kernelParams} needs to be an array of N pointers. Each of {@code kernelParams[0]} through
* {@code kernelParams[N-1]} must point to a region of memory from which the actual kernel parameter will be copied. The number of kernel parameters
* and their offsets and sizes do not need to be specified as that information is retrieved directly from the kernel's image.
* - Kernel parameters can also be packaged by the application into a single buffer that is passed in via the {@code extra} parameter.
*
*
This places the burden on the application of knowing each kernel parameter's size and alignment/padding within the buffer. Here is an example of
* using the {@code extra} parameter in this manner:
*
*
* size_t argBufferSize;
* char argBuffer[256];
*
* // populate argBuffer and argBufferSize
*
* void *config[] = {
* CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
* CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize,
* CU_LAUNCH_PARAM_END
* };
* status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL, config);
*
*
* The {@code extra} parameter exists to allow {@code cuLaunchKernel()} to take additional less commonly used arguments. {@code extra} specifies a list of
* names of extra settings and their corresponding values. Each extra setting name is immediately followed by the corresponding value. The list must be
* terminated with either {@code NULL} or {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}.
*
*
* - {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}, which indicates the end of the {@code extra} array
* - {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}, which specifies that the next value in {@code extra} will be a pointer to a buffer containing all the kernel
* parameters for launching kernel {@code f}
* - {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE}, which specifies that the next value in {@code extra} will be a pointer to a size_t containing the size of the buffer
* specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}
*
*
* The error {@link #CUDA_ERROR_INVALID_VALUE} will be returned if kernel parameters are specified with both {@code kernelParams} and {@code extra} (i.e. both
* {@code kernelParams} and {@code extra} are non-{@code NULL}).
*
* Calling {@code cuLaunchKernel()} invalidates the persistent function state set through the following deprecated APIs: {@link #cuFuncSetBlockShape FuncSetBlockShape},
* {@link #cuFuncSetSharedSize FuncSetSharedSize}, {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, {@link #cuParamSetv ParamSetv}.
*
* Note that to use {@link #cuLaunchKernel LaunchKernel}, the kernel {@code f} must either have been compiled with toolchain version 3.2 or later so that it will contain
* kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then {@link #cuLaunchKernel LaunchKernel} will return
* {@link #CUDA_ERROR_INVALID_IMAGE}.
*
* @param f kernel to launch
* @param gridDimX width of grid in blocks
* @param gridDimY height of grid in blocks
* @param gridDimZ depth of grid in blocks
* @param blockDimX x dimension of each thread block
* @param blockDimY y dimension of each thread block
* @param blockDimZ z dimension of each thread block
* @param sharedMemBytes dynamic shared-memory size per thread block in bytes
* @param hStream stream identifier
* @param kernelParams array of pointers to kernel parameters
* @param extra extra options
*/
@NativeType("CUresult")
public static int cuLaunchKernel(@NativeType("CUfunction") long f, @NativeType("unsigned int") int gridDimX, @NativeType("unsigned int") int gridDimY, @NativeType("unsigned int") int gridDimZ, @NativeType("unsigned int") int blockDimX, @NativeType("unsigned int") int blockDimY, @NativeType("unsigned int") int blockDimZ, @NativeType("unsigned int") int sharedMemBytes, @NativeType("CUstream") long hStream, @Nullable @NativeType("void **") PointerBuffer kernelParams, @Nullable @NativeType("void **") PointerBuffer extra) {
return ncuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, memAddressSafe(kernelParams), memAddressSafe(extra));
}
// --- [ cuLaunchCooperativeKernel ] ---
/** Unsafe version of: {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} */
public static int ncuLaunchCooperativeKernel(long f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, long hStream, long kernelParams) {
long __functionAddress = Functions.LaunchCooperativeKernel;
if (CHECKS) {
check(__functionAddress);
check(f);
}
return callPPPI(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, __functionAddress);
}
/**
* Launches a CUDA function where thread blocks can cooperate and synchronize as they execute.
*
* Invokes the kernel {@code f} on a {@code gridDimX} x {@code gridDimY} x {@code gridDimZ} grid of blocks. Each block contains {@code blockDimX} x {@code
* blockDimY} x {@code blockDimZ} threads.
*
* {@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.
*
* The device on which this kernel is invoked must have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH}.
*
* The total number of blocks launched cannot exceed the maximum number of blocks per multiprocessor as returned by
* {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} (or {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags}) times the number of multiprocessors as
* specified by the device attribute {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}.
*
* The kernel cannot make use of CUDA dynamic parallelism.
*
* Kernel parameters must be specified via {@code kernelParams}. If {@code f} has N parameters, then {@code kernelParams} needs to be an array of N
* pointers. Each of {@code kernelParams[0]} through {@code kernelParams[N-1]} must point to a region of memory from which the actual kernel parameter
* will be copied. The number of kernel parameters and their offsets and sizes do not need to be specified as that information is retrieved directly from
* the kernel's image.
*
* Calling {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} sets persistent function state that is the same as function state set through {@link #cuLaunchKernel LaunchKernel} API
*
* When the kernel {@code f} is launched via {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}, the previous block shape, shared size and parameter info associated with
* {@code f} is overwritten.
*
* Note that to use {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}, the kernel {@code f} must either have been compiled with toolchain version 3.2 or later so that it will
* contain kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} will
* return {@link #CUDA_ERROR_INVALID_IMAGE}.
*
* @param f kernel to launch
* @param gridDimX width of grid in blocks
* @param gridDimY height of grid in blocks
* @param gridDimZ depth of grid in blocks
* @param blockDimX x dimension of each thread block
* @param blockDimY y dimension of each thread block
* @param blockDimZ z dimension of each thread block
* @param sharedMemBytes dynamic shared-memory size per thread block in bytes
* @param hStream stream identifier
* @param kernelParams array of pointers to kernel parameters
*/
@NativeType("CUresult")
public static int cuLaunchCooperativeKernel(@NativeType("CUfunction") long f, @NativeType("unsigned int") int gridDimX, @NativeType("unsigned int") int gridDimY, @NativeType("unsigned int") int gridDimZ, @NativeType("unsigned int") int blockDimX, @NativeType("unsigned int") int blockDimY, @NativeType("unsigned int") int blockDimZ, @NativeType("unsigned int") int sharedMemBytes, @NativeType("CUstream") long hStream, @Nullable @NativeType("void **") PointerBuffer kernelParams) {
return ncuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, memAddressSafe(kernelParams));
}
// --- [ cuLaunchCooperativeKernelMultiDevice ] ---
/**
* Unsafe version of: {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice}
*
* @param numDevices size of the {@code launchParamsList} array
*/
public static int ncuLaunchCooperativeKernelMultiDevice(long launchParamsList, int numDevices, int flags) {
long __functionAddress = Functions.LaunchCooperativeKernelMultiDevice;
if (CHECKS) {
check(__functionAddress);
}
return callPI(launchParamsList, numDevices, flags, __functionAddress);
}
/**
* Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they executeDeprecated: This function is deprecated as
* of CUDA 11.3.
*
* Invokes kernels as specified in the {@code launchParamsList} array where each element of the array specifies all the parameters required to perform a
* single kernel launch. These kernels can cooperate and synchronize as they execute. The size of the array is specified by {@code numDevices}.
*
* No two kernels can be launched on the same device. All the devices targeted by this multi-device launch must be identical. All devices must have a
* non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH}.
*
* All kernels launched must be identical with respect to the compiled code. Note that any __device__ __constant__ or __managed__ variables present
* in the module that owns the kernel launched on each device, are independently instantiated on every device. It is the application's responsibility to
* ensure these variables are initialized and used appropriately.
*
* The size of the grids as specified in blocks, the size of the blocks themselves and the amount of shared memory used by each thread block must also
* match across all launched kernels.
*
* The streams used to launch these kernels must have been created via either {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority}. The {@code NULL} stream or
* {@link #CU_STREAM_LEGACY STREAM_LEGACY} or {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD} cannot be used.
*
* The total number of blocks launched per kernel cannot exceed the maximum number of blocks per multiprocessor as returned by
* {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} (or {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags}) times the number of multiprocessors as
* specified by the device attribute {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}. Since the total number of blocks launched per device has to match across
* all devices, the maximum number of blocks that can be launched per device will be limited by the device with the least number of multiprocessors.
*
* The kernels cannot make use of CUDA dynamic parallelism.
*
*
* - {@code CUDA_LAUNCH_PARAMS::function} specifies the kernel to be launched. All functions must be identical with respect to the compiled code.
* - {@code CUDA_LAUNCH_PARAMS::gridDimX} is the width of the grid in blocks. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::gridDimY} is the height of the grid in blocks. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::gridDimZ} is the depth of the grid in blocks. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::blockDimX} is the X dimension of each thread block. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::blockDimX} is the Y dimension of each thread block. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::blockDimZ} is the Z dimension of each thread block. This must match across all kernels launched.
* - {@code CUDA_LAUNCH_PARAMS::sharedMemBytes} is the dynamic shared-memory size per thread block in bytes. This must match across all kernels
* launched.
* - {@code CUDA_LAUNCH_PARAMS::hStream} is the handle to the stream to perform the launch in. This cannot be the {@code NULL} stream or {@link #CU_STREAM_LEGACY STREAM_LEGACY} or
* {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD}. The CUDA context associated with this stream must match that associated with {@code CUDA_LAUNCH_PARAMS::function}.
* - {@code CUDA_LAUNCH_PARAMS::kernelParams} is an array of pointers to kernel parameters. If {@code ::function} has N parameters, then
* {@code ::kernelParams} needs to be an array of N pointers. Each of {@code ::kernelParams[0]} through {@code ::kernelParams[N-1]} must point to a
* region of memory from which the actual kernel parameter will be copied. The number of kernel parameters and their offsets and sizes do not need to
* be specified as that information is retrieved directly from the kernel's image.
*
*
* By default, the kernel won't begin execution on any GPU until all prior work in all the specified streams has completed. This behavior can be
* overridden by specifying the flag {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC}. When this flag is specified, each kernel will only wait
* for prior work in the stream corresponding to that GPU to complete before it begins execution.
*
* Similarly, by default, any subsequent work pushed in any of the specified streams will not begin execution until the kernels on all GPUs have
* completed. This behavior can be overridden by specifying the flag {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC}. When this flag is
* specified, any subsequent work pushed in any of the specified streams will only wait for the kernel launched on the GPU corresponding to that stream to
* complete before it begins execution.
*
* Calling {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} sets persistent function state that is the same as function state set through {@link #cuLaunchKernel LaunchKernel} API
* when called individually for each element in {@code launchParamsList}.
*
* When kernels are launched via {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice}, the previous block shape, shared size and parameter info associated with each
* {@code CUDA_LAUNCH_PARAMS::function} in {@code launchParamsList} is overwritten.
*
* Note that to use {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice}, the kernels must either have been compiled with toolchain version 3.2 or later so that it
* will contain kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then
* {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} will return {@link #CUDA_ERROR_INVALID_IMAGE}.
*
* @param launchParamsList list of launch parameters, one per device
* @param flags flags to control launch behavior
*/
@NativeType("CUresult")
public static int cuLaunchCooperativeKernelMultiDevice(@NativeType("CUDA_LAUNCH_PARAMS *") CUDA_LAUNCH_PARAMS.Buffer launchParamsList, @NativeType("unsigned int") int flags) {
return ncuLaunchCooperativeKernelMultiDevice(launchParamsList.address(), launchParamsList.remaining(), flags);
}
// --- [ cuLaunchHostFunc ] ---
/** Unsafe version of: {@link #cuLaunchHostFunc LaunchHostFunc} */
public static int ncuLaunchHostFunc(long hStream, long fn, long userData) {
long __functionAddress = Functions.LaunchHostFunc;
if (CHECKS) {
check(__functionAddress);
check(userData);
}
return callPPPI(hStream, fn, userData, __functionAddress);
}
/**
* Enqueues a host function call in a stream.
*
* Enqueues a host function to run in a stream. The function will be called after currently enqueued work and will block work added after it.
*
* The host function must not make any CUDA API calls. Attempting to use a CUDA API may result in {@link #CUDA_ERROR_NOT_PERMITTED}, but this is not required.
* The host function must not perform any synchronization that may depend on outstanding CUDA work not mandated to run earlier. Host functions without a
* mandated order (such as in independent streams) execute in undefined order and may be serialized.
*
* For the purposes of Unified Memory, execution makes a number of guarantees:
*
*
* - The stream is considered idle for the duration of the function's execution. Thus, for example, the function may always use memory attached to the
* stream it was enqueued in.
* - The start of execution of the function has the same effect as synchronizing an event recorded in the same stream immediately prior to the function.
* It thus synchronizes streams which have been "joined" prior to the function.
* - Adding device work to any stream does not have the effect of making the stream active until all preceding host functions and stream callbacks have
* executed. Thus, for example, a function might use global attached memory even if work has been added to another stream, if the work has been
* ordered behind the function call with an event.
* - Completion of the function does not cause a stream to become active except as described above. The stream will remain idle if no device work
* follows the function, and will remain idle across consecutive host functions or stream callbacks without device work in between. Thus, for example,
* stream synchronization can be done by signaling from a host function at the end of the stream.
*
*
* Note that, in contrast to {@link #cuStreamAddCallback StreamAddCallback}, the function will not be called in the event of an error in the CUDA context.
*
* @param hStream stream to enqueue function call in
* @param fn the function to call once preceding stream operations are complete
* @param userData user-specified data to be passed to the function
*/
@NativeType("CUresult")
public static int cuLaunchHostFunc(@NativeType("CUstream") long hStream, @NativeType("void (*) (void *)") CUhostFnI fn, @NativeType("void *") long userData) {
return ncuLaunchHostFunc(hStream, fn.address(), userData);
}
// --- [ cuFuncSetBlockShape ] ---
/**
* Sets the block-dimensions for the function. (Deprecated)
*
* Specifies the {@code x}, {@code y}, and {@code z} dimensions of the thread blocks that are created when the kernel given by {@code hfunc} is launched.
*
* @param hfunc kernel to specify dimensions of
* @param x x dimension
* @param y y dimension
* @param z z dimension
*/
@NativeType("CUresult")
public static int cuFuncSetBlockShape(@NativeType("CUfunction") long hfunc, int x, int y, int z) {
long __functionAddress = Functions.FuncSetBlockShape;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, x, y, z, __functionAddress);
}
// --- [ cuFuncSetSharedSize ] ---
/**
* Sets the dynamic shared-memory size for the function. (Deprecated)
*
* Sets through {@code bytes} the amount of dynamic shared memory that will be available to each thread block when the kernel given by {@code hfunc} is
* launched.
*
* @param hfunc kernel to specify dynamic shared-memory size for
* @param bytes dynamic shared-memory size per thread in bytes
*/
@NativeType("CUresult")
public static int cuFuncSetSharedSize(@NativeType("CUfunction") long hfunc, @NativeType("unsigned int") int bytes) {
long __functionAddress = Functions.FuncSetSharedSize;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, bytes, __functionAddress);
}
// --- [ cuParamSetSize ] ---
/**
* Sets the parameter size for the function. (Deprecated)
*
* Sets through {@code numbytes} the total size in bytes needed by the function parameters of the kernel corresponding to {@code hfunc}.
*
* @param hfunc kernel to set parameter size for
* @param numbytes size of parameter list in bytes
*/
@NativeType("CUresult")
public static int cuParamSetSize(@NativeType("CUfunction") long hfunc, @NativeType("unsigned int") int numbytes) {
long __functionAddress = Functions.ParamSetSize;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, numbytes, __functionAddress);
}
// --- [ cuParamSeti ] ---
/**
* Adds an integer parameter to the function's argument listDeprecated:
*
* Sets an integer parameter that will be specified the next time the kernel corresponding to {@code hfunc} will be invoked. {@code offset} is a byte
* offset.
*
* @param hfunc kernel to add parameter to
* @param offset offset to add parameter to argument list
* @param value value of parameter
*/
@NativeType("CUresult")
public static int cuParamSeti(@NativeType("CUfunction") long hfunc, int offset, @NativeType("unsigned int") int value) {
long __functionAddress = Functions.ParamSeti;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, offset, value, __functionAddress);
}
// --- [ cuParamSetf ] ---
/**
* Adds a floating-point parameter to the function's argument list. (Deprecated)
*
* Sets a floating-point parameter that will be specified the next time the kernel corresponding to {@code hfunc} will be invoked. {@code offset} is a
* byte offset.
*
* @param hfunc kernel to add parameter to
* @param offset offset to add parameter to argument list
* @param value value of parameter
*/
@NativeType("CUresult")
public static int cuParamSetf(@NativeType("CUfunction") long hfunc, int offset, float value) {
long __functionAddress = Functions.ParamSetf;
if (CHECKS) {
check(hfunc);
}
return callPI(hfunc, offset, value, __functionAddress);
}
// --- [ cuParamSetv ] ---
/**
* Unsafe version of: {@link #cuParamSetv ParamSetv}
*
* @param numbytes size of data to copy in bytes
*/
public static int ncuParamSetv(long hfunc, int offset, long ptr, int numbytes) {
long __functionAddress = Functions.ParamSetv;
if (CHECKS) {
check(hfunc);
}
return callPPI(hfunc, offset, ptr, numbytes, __functionAddress);
}
/**
* Adds arbitrary data to the function's argument list. (Deprecated)
*
* Copies an arbitrary amount of data (specified in {@code numbytes}) from {@code ptr} into the parameter space of the kernel corresponding to
* {@code hfunc}. {@code offset} is a byte offset.
*
* @param hfunc kernel to add data to
* @param offset offset to add data to argument list
* @param ptr pointer to arbitrary data
*/
@NativeType("CUresult")
public static int cuParamSetv(@NativeType("CUfunction") long hfunc, int offset, @NativeType("void *") ByteBuffer ptr) {
return ncuParamSetv(hfunc, offset, memAddress(ptr), ptr.remaining());
}
// --- [ cuLaunch ] ---
/**
* Launches a CUDA function. (Deprecated)
*
* Invokes the kernel {@code f} on a 1 x 1 x 1 grid of blocks. The block contains the number of threads specified by a previous call to
* {@link #cuFuncSetBlockShape FuncSetBlockShape}.
*
* The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize},
* {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.
*
* Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After
* launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.
*
* @param f kernel to launch
*/
@NativeType("CUresult")
public static int cuLaunch(@NativeType("CUfunction") long f) {
long __functionAddress = Functions.Launch;
if (CHECKS) {
check(f);
}
return callPI(f, __functionAddress);
}
// --- [ cuLaunchGrid ] ---
/**
* Launches a CUDA function. (Deprecated)
*
* Invokes the kernel {@code f} on a {@code grid_width} x {@code grid_height} grid of blocks. Each block contains the number of threads specified by a
* previous call to {@link #cuFuncSetBlockShape FuncSetBlockShape}.
*
* The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize},
* {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.
*
* Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After
* launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.
*
* @param f kernel to launch
* @param grid_width width of grid in blocks
* @param grid_height height of grid in blocks
*/
@NativeType("CUresult")
public static int cuLaunchGrid(@NativeType("CUfunction") long f, int grid_width, int grid_height) {
long __functionAddress = Functions.LaunchGrid;
if (CHECKS) {
check(f);
}
return callPI(f, grid_width, grid_height, __functionAddress);
}
// --- [ cuLaunchGridAsync ] ---
/**
* Launches a CUDA function. (Deprecated)
*
* Invokes the kernel {@code f} on a {@code grid_width} x {@code grid_height} grid of blocks. Each block contains the number of threads specified by a
* previous call to {@link #cuFuncSetBlockShape FuncSetBlockShape}.
*
* The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize},
* {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.
*
* Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After
* launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.
*
* Note
*
* In certain cases where cubins are created with no ABI (i.e., using {@code ptxas} {@code --abi-compile} {@code no}), this function may
* serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it
* afterwards.
*
* @param f kernel to launch
* @param grid_width width of grid in blocks
* @param grid_height height of grid in blocks
* @param hStream stream identifier
*/
@NativeType("CUresult")
public static int cuLaunchGridAsync(@NativeType("CUfunction") long f, int grid_width, int grid_height, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.LaunchGridAsync;
if (CHECKS) {
check(f);
}
return callPPI(f, grid_width, grid_height, hStream, __functionAddress);
}
// --- [ cuParamSetTexRef ] ---
/**
* Adds a texture-reference to the function's argument list. (Deprecated)
*
* Makes the CUDA array or linear memory bound to the texture reference {@code hTexRef} available to a device program as a texture. In this version of
* CUDA, the texture-reference must be obtained via {@link #cuModuleGetTexRef ModuleGetTexRef} and the {@code texunit} parameter must be set to {@link #CU_PARAM_TR_DEFAULT PARAM_TR_DEFAULT}.
*
* @param hfunc kernel to add texture-reference to
* @param texunit texture unit (must be {@link #CU_PARAM_TR_DEFAULT PARAM_TR_DEFAULT})
* @param hTexRef texture-reference to add to argument list
*/
@NativeType("CUresult")
public static int cuParamSetTexRef(@NativeType("CUfunction") long hfunc, int texunit, @NativeType("CUtexref") long hTexRef) {
long __functionAddress = Functions.ParamSetTexRef;
if (CHECKS) {
check(hfunc);
check(hTexRef);
}
return callPPI(hfunc, texunit, hTexRef, __functionAddress);
}
// --- [ cuGraphCreate ] ---
/** Unsafe version of: {@link #cuGraphCreate GraphCreate} */
public static int ncuGraphCreate(long phGraph, int flags) {
long __functionAddress = Functions.GraphCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPI(phGraph, flags, __functionAddress);
}
/**
* Creates a graph.
*
* Creates an empty graph, which is returned via {@code phGraph}.
*
* @param phGraph returns newly created graph
* @param flags graph creation flags, must be 0
*/
@NativeType("CUresult")
public static int cuGraphCreate(@NativeType("CUgraph *") PointerBuffer phGraph, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(phGraph, 1);
}
return ncuGraphCreate(memAddress(phGraph), flags);
}
// --- [ cuGraphAddKernelNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddKernelNode GraphAddKernelNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddKernelNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) {
long __functionAddress = Functions.GraphAddKernelNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
CUDA_KERNEL_NODE_PARAMS.validate(nodeParams);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress);
}
/**
* Creates a kernel execution node and adds it to a graph.
*
* Creates a new kernel execution node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and
* arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the
* graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* When the graph is launched, the node will invoke kernel {@code func} on a ({@code gridDimX} x {@code gridDimY} x {@code gridDimZ}) grid of blocks. Each
* block contains ({@code blockDimX} x {@code blockDimY} x {@code blockDimZ}) threads.
*
* {@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.
*
* Kernel parameters to {@code func} can be specified in one of two ways:
*
*
* - Kernel parameters can be specified via {@code kernelParams}. If the kernel has N parameters, then {@code kernelParams} needs to be an array of N
* pointers. Each pointer, from {@code kernelParams[0]} to {@code kernelParams[N-1]}, points to the region of memory from which the actual parameter
* will be copied. The number of kernel parameters and their offsets and sizes do not need to be specified as that information is retrieved directly
* from the kernel's image.
* - Kernel parameters for non-cooperative kernels can also be packaged by the application into a single buffer that is passed in via {@code extra}.
* This places the burden on the application of knowing each kernel parameter's size and alignment/padding within the buffer. The {@code extra}
* parameter exists to allow this function to take additional less commonly used arguments. {@code extra} specifies a list of names of extra settings
* and their corresponding values. Each extra setting name is immediately followed by the corresponding value. The list must be terminated with either
* {@code NULL} or {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}.
*
*
* - {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}, which indicates the end of the {@code extra} array;
* - {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}, which specifies that the next value in {@code extra} will be a pointer to a buffer containing all the kernel
* parameters for launching kernel {@code func;}
* - {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE}, which specifies that the next value in {@code extra} will be a pointer to a size_t containing the size of the buffer
* specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER};
*
*
*
* The error {@link #CUDA_ERROR_INVALID_VALUE} will be returned if kernel parameters are specified with both {@code kernelParams} and {@code extra} (i.e. both
* {@code kernelParams} and {@code extra} are non-NULL). {@link #CUDA_ERROR_INVALID_VALUE} will be returned if {@code extra} is used for a cooperative kernel.
*
* The {@code kernelParams} or {@code extra} array, as well as the argument values it points to, are copied during this call.
*
* Note
*
* Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is
* undefined behavior. This restriction does not apply to texture and surface objects.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param nodeParams parameters for the GPU execution node
*/
@NativeType("CUresult")
public static int cuGraphAddKernelNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddKernelNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address());
}
// --- [ cuGraphKernelNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphKernelNodeGetParams GraphKernelNodeGetParams} */
public static int ncuGraphKernelNodeGetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphKernelNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Returns a kernel node's parameters.
*
* Returns the parameters of kernel node {@code hNode} in {@code nodeParams}. The {@code kernelParams} or {@code extra} array returned in
* {@code nodeParams}, as well as the argument values it points to, are owned by the node. This memory remains valid until the node is destroyed or its
* parameters are modified, and should not be modified directly. Use {@link #cuGraphKernelNodeSetParams GraphKernelNodeSetParams} to update the parameters of this node.
*
* The params will contain either {@code kernelParams} or {@code extra}, according to which of these was most recently set on the node.
*
* @param hNode node to get the parameters for
* @param nodeParams pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphKernelNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS *") CUDA_KERNEL_NODE_PARAMS nodeParams) {
return ncuGraphKernelNodeGetParams(hNode, nodeParams.address());
}
// --- [ cuGraphKernelNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphKernelNodeSetParams GraphKernelNodeSetParams} */
public static int ncuGraphKernelNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphKernelNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
CUDA_KERNEL_NODE_PARAMS.validate(nodeParams);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets a kernel node's parameters.
*
* Sets the parameters of kernel node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphKernelNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) {
return ncuGraphKernelNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddMemcpyNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddMemcpyNode GraphAddMemcpyNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddMemcpyNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long copyParams, long ctx) {
long __functionAddress = Functions.GraphAddMemcpyNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
check(ctx);
}
return callPPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx, __functionAddress);
}
/**
* Creates a memcpy node and adds it to a graph.
*
* Creates a new memcpy node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}. It is possible
* for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} may not have any duplicate
* entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* When the graph is launched, the node will perform the memcpy described by {@code copyParams}. See {@link #cuMemcpy3D Memcpy3D} for a description of the structure and
* its restrictions.
*
* Memcpy nodes have some additional restrictions with regards to managed memory, if the system contains at least one device which has a zero value for
* the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If one or more of the operands refer to managed memory, then using the memory
* type {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED} is disallowed for those operand(s). The managed memory will be treated as residing on either the host or the device,
* depending on which memory type is specified.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param copyParams parameters for the memory copy
* @param ctx context on which to run the node
*/
@NativeType("CUresult")
public static int cuGraphAddMemcpyNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D copyParams, @NativeType("CUcontext") long ctx) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddMemcpyNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), copyParams.address(), ctx);
}
// --- [ cuGraphMemcpyNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphMemcpyNodeGetParams GraphMemcpyNodeGetParams} */
public static int ncuGraphMemcpyNodeGetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphMemcpyNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Returns a memcpy node's parameters.
*
* Returns the parameters of memcpy node {@code hNode} in {@code nodeParams}.
*
* @param hNode node to get the parameters for
* @param nodeParams pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphMemcpyNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D *") CUDA_MEMCPY3D nodeParams) {
return ncuGraphMemcpyNodeGetParams(hNode, nodeParams.address());
}
// --- [ cuGraphMemcpyNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphMemcpyNodeSetParams GraphMemcpyNodeSetParams} */
public static int ncuGraphMemcpyNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphMemcpyNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets a memcpy node's parameters.
*
* Sets the parameters of memcpy node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphMemcpyNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D nodeParams) {
return ncuGraphMemcpyNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddMemsetNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddMemsetNode GraphAddMemsetNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddMemsetNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long memsetParams, long ctx) {
long __functionAddress = Functions.GraphAddMemsetNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
CUDA_MEMSET_NODE_PARAMS.validate(memsetParams);
check(ctx);
}
return callPPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx, __functionAddress);
}
/**
* Creates a memset node and adds it to a graph.
*
* Creates a new memset node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}. It is possible
* for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} may not have any duplicate
* entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* The element size must be 1, 2, or 4 bytes. When the graph is launched, the node will perform the memset described by {@code memsetParams}.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param memsetParams parameters for the memory set
* @param ctx context on which to run the node
*/
@NativeType("CUresult")
public static int cuGraphAddMemsetNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS memsetParams, @NativeType("CUcontext") long ctx) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddMemsetNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), memsetParams.address(), ctx);
}
// --- [ cuGraphMemsetNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphMemsetNodeGetParams GraphMemsetNodeGetParams} */
public static int ncuGraphMemsetNodeGetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphMemsetNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Returns a memset node's parameters.
*
* Returns the parameters of memset node {@code hNode} in {@code nodeParams}.
*
* @param hNode node to get the parameters for
* @param nodeParams pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphMemsetNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS *") CUDA_MEMSET_NODE_PARAMS nodeParams) {
return ncuGraphMemsetNodeGetParams(hNode, nodeParams.address());
}
// --- [ cuGraphMemsetNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphMemsetNodeSetParams GraphMemsetNodeSetParams} */
public static int ncuGraphMemsetNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphMemsetNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
CUDA_MEMSET_NODE_PARAMS.validate(nodeParams);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets a memset node's parameters.
*
* Sets the parameters of memset node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphMemsetNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS nodeParams) {
return ncuGraphMemsetNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddHostNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddHostNode GraphAddHostNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddHostNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) {
long __functionAddress = Functions.GraphAddHostNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
CUDA_HOST_NODE_PARAMS.validate(nodeParams);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress);
}
/**
* Creates a host execution node and adds it to a graph.
*
* Creates a new CPU execution node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and
* arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the
* graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* When the graph is launched, the node will invoke the specified CPU function. Host nodes are not supported under MPS with pre-Volta GPUs.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param nodeParams parameters for the host node
*/
@NativeType("CUresult")
public static int cuGraphAddHostNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddHostNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address());
}
// --- [ cuGraphHostNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphHostNodeGetParams GraphHostNodeGetParams} */
public static int ncuGraphHostNodeGetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphHostNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Returns a host node's parameters.
*
* Returns the parameters of host node {@code hNode} in {@code nodeParams}.
*
* @param hNode node to get the parameters for
* @param nodeParams pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphHostNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS *") CUDA_HOST_NODE_PARAMS nodeParams) {
return ncuGraphHostNodeGetParams(hNode, nodeParams.address());
}
// --- [ cuGraphHostNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphHostNodeSetParams GraphHostNodeSetParams} */
public static int ncuGraphHostNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphHostNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
CUDA_HOST_NODE_PARAMS.validate(nodeParams);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets a host node's parameters.
*
* Sets the parameters of host node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphHostNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) {
return ncuGraphHostNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddChildGraphNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddChildGraphNode GraphAddChildGraphNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddChildGraphNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long childGraph) {
long __functionAddress = Functions.GraphAddChildGraphNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
check(childGraph);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, childGraph, __functionAddress);
}
/**
* Creates a child graph node and adds it to a graph.
*
* Creates a new node which executes an embedded graph, and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code
* dependencies}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies}
* may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* If {@code hGraph} contains allocation or free nodes, this call will return an error.
*
* The node executes an embedded child graph. The child graph is cloned in this call.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param childGraph the graph to clone into this node
*/
@NativeType("CUresult")
public static int cuGraphAddChildGraphNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUgraph") long childGraph) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddChildGraphNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), childGraph);
}
// --- [ cuGraphChildGraphNodeGetGraph ] ---
/** Unsafe version of: {@link #cuGraphChildGraphNodeGetGraph GraphChildGraphNodeGetGraph} */
public static int ncuGraphChildGraphNodeGetGraph(long hNode, long phGraph) {
long __functionAddress = Functions.GraphChildGraphNodeGetGraph;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, phGraph, __functionAddress);
}
/**
* Gets a handle to the embedded graph of a child graph node.
*
* Gets a handle to the embedded graph in a child graph node. This call does not clone the graph. Changes to the graph will be reflected in the node, and
* the node retains ownership of the graph.
*
* Allocation and free nodes cannot be added to the returned graph. Attempting to do so will return an error.
*
* @param hNode node to get the embedded graph for
* @param phGraph location to store a handle to the graph
*/
@NativeType("CUresult")
public static int cuGraphChildGraphNodeGetGraph(@NativeType("CUgraphNode") long hNode, @NativeType("CUgraph *") PointerBuffer phGraph) {
if (CHECKS) {
check(phGraph, 1);
}
return ncuGraphChildGraphNodeGetGraph(hNode, memAddress(phGraph));
}
// --- [ cuGraphAddEmptyNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddEmptyNode GraphAddEmptyNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddEmptyNode(long phGraphNode, long hGraph, long dependencies, long numDependencies) {
long __functionAddress = Functions.GraphAddEmptyNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPI(phGraphNode, hGraph, dependencies, numDependencies, __functionAddress);
}
/**
* Creates an empty node and adds it to a graph.
*
* Creates a new node which performs no operation, and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code
* dependencies}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies}
* may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of n
* nodes with a barrier between them can be represented using an empty node and 2*n dependency edges, rather than no empty node and n^2 dependency edges.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
*/
@NativeType("CUresult")
public static int cuGraphAddEmptyNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddEmptyNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies));
}
// --- [ cuGraphAddEventRecordNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddEventRecordNode GraphAddEventRecordNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddEventRecordNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long event) {
long __functionAddress = Functions.GraphAddEventRecordNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
check(event);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, event, __functionAddress);
}
/**
* Creates an event record node and adds it to a graph.
*
* Creates a new event record node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and event
* specified in {@code event}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code
* dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* Each launch of the graph will record {@code event} to capture execution of the node's dependencies.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param event event for the node
*/
@NativeType("CUresult")
public static int cuGraphAddEventRecordNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUevent") long event) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddEventRecordNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), event);
}
// --- [ cuGraphEventRecordNodeGetEvent ] ---
/** Unsafe version of: {@link #cuGraphEventRecordNodeGetEvent GraphEventRecordNodeGetEvent} */
public static int ncuGraphEventRecordNodeGetEvent(long hNode, long event_out) {
long __functionAddress = Functions.GraphEventRecordNodeGetEvent;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, event_out, __functionAddress);
}
/**
* Returns the event associated with an event record node.
*
* Returns the event of event record node {@code hNode} in {@code event_out}.
*
* @param hNode node to get the event for
* @param event_out pointer to return the event
*/
@NativeType("CUresult")
public static int cuGraphEventRecordNodeGetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent *") PointerBuffer event_out) {
if (CHECKS) {
check(event_out, 1);
}
return ncuGraphEventRecordNodeGetEvent(hNode, memAddress(event_out));
}
// --- [ cuGraphEventRecordNodeSetEvent ] ---
/**
* Sets an event record node's event.
*
* Sets the event of event record node {@code hNode} to {@code event}.
*
* @param hNode node to set the event for
* @param event event to use
*/
@NativeType("CUresult")
public static int cuGraphEventRecordNodeSetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) {
long __functionAddress = Functions.GraphEventRecordNodeSetEvent;
if (CHECKS) {
check(__functionAddress);
check(hNode);
check(event);
}
return callPPI(hNode, event, __functionAddress);
}
// --- [ cuGraphAddEventWaitNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddEventWaitNode GraphAddEventWaitNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddEventWaitNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long event) {
long __functionAddress = Functions.GraphAddEventWaitNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
check(event);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, event, __functionAddress);
}
/**
* Creates an event wait node and adds it to a graph.
*
* Creates a new event wait node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and event
* specified in {@code event}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code
* dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* The graph node will wait for all work captured in {@code event}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event. {@code event} may
* be from a different context or device than the launch stream.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param event event for the node
*/
@NativeType("CUresult")
public static int cuGraphAddEventWaitNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUevent") long event) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddEventWaitNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), event);
}
// --- [ cuGraphEventWaitNodeGetEvent ] ---
/** Unsafe version of: {@link #cuGraphEventWaitNodeGetEvent GraphEventWaitNodeGetEvent} */
public static int ncuGraphEventWaitNodeGetEvent(long hNode, long event_out) {
long __functionAddress = Functions.GraphEventWaitNodeGetEvent;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, event_out, __functionAddress);
}
/**
* Returns the event associated with an event wait node.
*
* Returns the event of event wait node {@code hNode} in {@code event_out}.
*
* @param hNode node to get the event for
* @param event_out pointer to return the event
*/
@NativeType("CUresult")
public static int cuGraphEventWaitNodeGetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent *") PointerBuffer event_out) {
if (CHECKS) {
check(event_out, 1);
}
return ncuGraphEventWaitNodeGetEvent(hNode, memAddress(event_out));
}
// --- [ cuGraphEventWaitNodeSetEvent ] ---
/**
* Sets an event wait node's event.
*
* Sets the event of event wait node {@code hNode} to {@code event}.
*
* @param hNode node to set the event for
* @param event event to use
*/
@NativeType("CUresult")
public static int cuGraphEventWaitNodeSetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) {
long __functionAddress = Functions.GraphEventWaitNodeSetEvent;
if (CHECKS) {
check(__functionAddress);
check(hNode);
check(event);
}
return callPPI(hNode, event, __functionAddress);
}
// --- [ cuGraphAddExternalSemaphoresSignalNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddExternalSemaphoresSignalNode GraphAddExternalSemaphoresSignalNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddExternalSemaphoresSignalNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) {
long __functionAddress = Functions.GraphAddExternalSemaphoresSignalNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress);
}
/**
* Creates an external semaphore signal node and adds it to a graph.
*
* Creates a new external semaphore signal node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}
* and arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of
* the graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* Performs a signal operation on a set of externally allocated semaphore objects when the node is launched. The operation(s) will occur after all of the
* node's dependencies have completed.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param nodeParams parameters for the node
*/
@NativeType("CUresult")
public static int cuGraphAddExternalSemaphoresSignalNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddExternalSemaphoresSignalNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address());
}
// --- [ cuGraphExternalSemaphoresSignalNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphExternalSemaphoresSignalNodeGetParams GraphExternalSemaphoresSignalNodeGetParams} */
public static int ncuGraphExternalSemaphoresSignalNodeGetParams(long hNode, long params_out) {
long __functionAddress = Functions.GraphExternalSemaphoresSignalNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, params_out, __functionAddress);
}
/**
* Returns an external semaphore signal node's parameters.
*
* Returns the parameters of an external semaphore signal node {@code hNode} in {@code params_out}. The {@code extSemArray} and {@code paramsArray}
* returned in {@code params_out}, are owned by the node. This memory remains valid until the node is destroyed or its parameters are modified, and should
* not be modified directly. Use {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} to update the parameters of this node.
*
* @param hNode node to get the parameters for
* @param params_out pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphExternalSemaphoresSignalNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out) {
return ncuGraphExternalSemaphoresSignalNodeGetParams(hNode, params_out.address());
}
// --- [ cuGraphExternalSemaphoresSignalNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} */
public static int ncuGraphExternalSemaphoresSignalNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExternalSemaphoresSignalNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets an external semaphore signal node's parameters.
*
* Sets the parameters of an external semaphore signal node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphExternalSemaphoresSignalNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) {
return ncuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddExternalSemaphoresWaitNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddExternalSemaphoresWaitNode GraphAddExternalSemaphoresWaitNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddExternalSemaphoresWaitNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) {
long __functionAddress = Functions.GraphAddExternalSemaphoresWaitNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress);
}
/**
* Creates an external semaphore wait node and adds it to a graph.
*
* Creates a new external semaphore wait node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}
* and arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of
* the graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* Performs a wait operation on a set of externally allocated semaphore objects when the node is launched. The node's dependencies will not be launched
* until the wait operation has completed.
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param nodeParams parameters for the node
*/
@NativeType("CUresult")
public static int cuGraphAddExternalSemaphoresWaitNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddExternalSemaphoresWaitNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address());
}
// --- [ cuGraphExternalSemaphoresWaitNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphExternalSemaphoresWaitNodeGetParams GraphExternalSemaphoresWaitNodeGetParams} */
public static int ncuGraphExternalSemaphoresWaitNodeGetParams(long hNode, long params_out) {
long __functionAddress = Functions.GraphExternalSemaphoresWaitNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, params_out, __functionAddress);
}
/**
* Returns an external semaphore wait node's parameters.
*
* Returns the parameters of an external semaphore wait node {@code hNode} in {@code params_out}. The {@code extSemArray} and {@code paramsArray} returned
* in {@code params_out}, are owned by the node. This memory remains valid until the node is destroyed or its parameters are modified, and should not be
* modified directly. Use {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} to update the parameters of this node.
*
* @param hNode node to get the parameters for
* @param params_out pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphExternalSemaphoresWaitNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS *") CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out) {
return ncuGraphExternalSemaphoresWaitNodeGetParams(hNode, params_out.address());
}
// --- [ cuGraphExternalSemaphoresWaitNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExternalSemaphoresWaitNodeSetParams GraphExternalSemaphoresWaitNodeSetParams} */
public static int ncuGraphExternalSemaphoresWaitNodeSetParams(long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExternalSemaphoresWaitNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams);
}
return callPPI(hNode, nodeParams, __functionAddress);
}
/**
* Sets an external semaphore wait node's parameters.
*
* Sets the parameters of an external semaphore wait node {@code hNode} to {@code nodeParams}.
*
* @param hNode node to set the parameters for
* @param nodeParams parameters to copy
*/
@NativeType("CUresult")
public static int cuGraphExternalSemaphoresWaitNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) {
return ncuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams.address());
}
// --- [ cuGraphAddMemAllocNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddMemAllocNode GraphAddMemAllocNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddMemAllocNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) {
long __functionAddress = Functions.GraphAddMemAllocNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress);
}
/**
* Creates an allocation node and adds it to a graph.
*
* Creates a new allocation node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and arguments
* specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph.
* {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* When {@link #cuGraphAddMemAllocNode GraphAddMemAllocNode} creates an allocation node, it returns the address of the allocation in {@code nodeParams.dptr}. The allocation's address
* remains fixed across instantiations and launches.
*
* If the allocation is freed in the same graph, by creating a free node using {@link #cuGraphAddMemFreeNode GraphAddMemFreeNode}, the allocation can be accessed by nodes ordered
* after the allocation node but before the free node. These allocations cannot be freed outside the owning graph, and they can only be freed once in the
* owning graph.
*
* If the allocation is not freed in the same graph, then it can be accessed not only by nodes in the graph which are ordered after the allocation node,
* but also by stream operations ordered after the graph's execution but before the allocation is freed.
*
* Allocations which are not freed in the same graph can be freed by:
*
*
* - passing the allocation to {@link #cuMemFreeAsync MemFreeAsync} or {@link #cuMemFree MemFree};
* - launching a graph with a free node for that allocation; or
* - specifying {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH} during instantiation, which makes each launch behave as though it called
* {@link #cuMemFreeAsync MemFreeAsync} for every unfreed allocation.
*
*
* It is not possible to free an allocation in both the owning graph and another graph. If the allocation is freed in the same graph, a free node cannot
* be added to another graph. If the allocation is freed in another graph, a free node can no longer be added to the owning graph.
*
* The following restrictions apply to graphs which contain allocation and/or memory free nodes:
*
*
* - Nodes and edges of the graph cannot be deleted.
* - The graph cannot be used in a child node.
* - Only one instantiation of the graph may exist at any point in time.
* - The graph cannot be cloned.
*
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param nodeParams parameters for the node
*/
@NativeType("CUresult")
public static int cuGraphAddMemAllocNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEM_ALLOC_NODE_PARAMS *") CUDA_MEM_ALLOC_NODE_PARAMS nodeParams) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddMemAllocNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address());
}
// --- [ cuGraphMemAllocNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphMemAllocNodeGetParams GraphMemAllocNodeGetParams} */
public static int ncuGraphMemAllocNodeGetParams(long hNode, long params_out) {
long __functionAddress = Functions.GraphMemAllocNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, params_out, __functionAddress);
}
/**
* Returns a memory alloc node's parameters.
*
* Returns the parameters of a memory alloc node {@code hNode} in {@code params_out}. The {@code poolProps} and {@code accessDescs} returned in {@code
* params_out}, are owned by the node. This memory remains valid until the node is destroyed. The returned parameters must not be modified.
*
* @param hNode node to get the parameters for
* @param params_out pointer to return the parameters
*/
@NativeType("CUresult")
public static int cuGraphMemAllocNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEM_ALLOC_NODE_PARAMS *") CUDA_MEM_ALLOC_NODE_PARAMS params_out) {
return ncuGraphMemAllocNodeGetParams(hNode, params_out.address());
}
// --- [ cuGraphAddMemFreeNode ] ---
/**
* Unsafe version of: {@link #cuGraphAddMemFreeNode GraphAddMemFreeNode}
*
* @param numDependencies number of dependencies
*/
public static int ncuGraphAddMemFreeNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long dptr) {
long __functionAddress = Functions.GraphAddMemFreeNode;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
check(dptr);
}
return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, dptr, __functionAddress);
}
/**
* Creates a memory free node and adds it to a graph.
*
* Creates a new memory free node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and arguments
* specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph.
* {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.
*
* {@link #cuGraphAddMemFreeNode GraphAddMemFreeNode} will return {@link #CUDA_ERROR_INVALID_VALUE} if the user attempts to free:
*
*
* - an allocation twice in the same graph.
* - an address that was not returned by an allocation node.
* - an invalid address.
*
*
* The following restrictions apply to graphs which contain allocation and/or memory free nodes:
*
*
* - Nodes and edges of the graph cannot be deleted.
* - The graph cannot be used in a child node.
* - Only one instantiation of the graph may exist at any point in time.
* - The graph cannot be cloned.
*
*
* @param phGraphNode returns newly created node
* @param hGraph graph to which to add the node
* @param dependencies dependencies of the node
* @param dptr address of memory to free
*/
@NativeType("CUresult")
public static int cuGraphAddMemFreeNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUdeviceptr") long dptr) {
if (CHECKS) {
check(phGraphNode, 1);
}
return ncuGraphAddMemFreeNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), dptr);
}
// --- [ cuGraphMemFreeNodeGetParams ] ---
/** Unsafe version of: {@link #cuGraphMemFreeNodeGetParams GraphMemFreeNodeGetParams} */
public static int ncuGraphMemFreeNodeGetParams(long hNode, long dptr_out) {
long __functionAddress = Functions.GraphMemFreeNodeGetParams;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, dptr_out, __functionAddress);
}
/**
* Returns a memory free node's parameters.
*
* Returns the address of a memory free node {@code hNode} in {@code dptr_out}.
*
* @param hNode node to get the parameters for
* @param dptr_out pointer to return the device address
*/
@NativeType("CUresult")
public static int cuGraphMemFreeNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUdeviceptr *") PointerBuffer dptr_out) {
if (CHECKS) {
check(dptr_out, 1);
}
return ncuGraphMemFreeNodeGetParams(hNode, memAddress(dptr_out));
}
// --- [ cuDeviceGraphMemTrim ] ---
/**
* Free unused memory that was cached on the specified device for use with graphs back to the OS.
*
* Blocks which are not in use by a graph that is either currently executing or scheduled to execute are freed back to the operating system.
*
* @param device the device for which cached memory should be freed
*/
@NativeType("CUresult")
public static int cuDeviceGraphMemTrim(@NativeType("CUdevice") int device) {
long __functionAddress = Functions.DeviceGraphMemTrim;
if (CHECKS) {
check(__functionAddress);
}
return callI(device, __functionAddress);
}
// --- [ cuDeviceGetGraphMemAttribute ] ---
/** Unsafe version of: {@link #cuDeviceGetGraphMemAttribute DeviceGetGraphMemAttribute} */
public static int ncuDeviceGetGraphMemAttribute(int device, int attr, long value) {
long __functionAddress = Functions.DeviceGetGraphMemAttribute;
if (CHECKS) {
check(__functionAddress);
}
return callPI(device, attr, value, __functionAddress);
}
/**
* Query asynchronous allocation attributes related to graphs.
*
* Valid attributes are:
*
*
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT}: Amount of memory, in bytes, currently associated with graphs
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can
* only be reset to zero.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT}: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
*
*
* @param device specifies the scope of the query
* @param attr attribute to get
* @param value retrieved value
*/
@NativeType("CUresult")
public static int cuDeviceGetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") ByteBuffer value) {
return ncuDeviceGetGraphMemAttribute(device, attr, memAddress(value));
}
/**
* Query asynchronous allocation attributes related to graphs.
*
* Valid attributes are:
*
*
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT}: Amount of memory, in bytes, currently associated with graphs
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can
* only be reset to zero.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT}: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
*
*
* @param device specifies the scope of the query
* @param attr attribute to get
* @param value retrieved value
*/
@NativeType("CUresult")
public static int cuDeviceGetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") LongBuffer value) {
return ncuDeviceGetGraphMemAttribute(device, attr, memAddress(value));
}
// --- [ cuDeviceSetGraphMemAttribute ] ---
/** Unsafe version of: {@link #cuDeviceSetGraphMemAttribute DeviceSetGraphMemAttribute} */
public static int ncuDeviceSetGraphMemAttribute(int device, int attr, long value) {
long __functionAddress = Functions.DeviceSetGraphMemAttribute;
if (CHECKS) {
check(__functionAddress);
}
return callPI(device, attr, value, __functionAddress);
}
/**
* Set asynchronous allocation attributes related to graphs.
*
* Valid attributes are:
*
*
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can
* only be reset to zero.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
*
*
* @param device specifies the scope of the query
* @param attr attribute to get
* @param value pointer to value to set
*/
@NativeType("CUresult")
public static int cuDeviceSetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") ByteBuffer value) {
return ncuDeviceSetGraphMemAttribute(device, attr, memAddress(value));
}
/**
* Set asynchronous allocation attributes related to graphs.
*
* Valid attributes are:
*
*
* - {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can
* only be reset to zero.
* - {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
*
*
* @param device specifies the scope of the query
* @param attr attribute to get
* @param value pointer to value to set
*/
@NativeType("CUresult")
public static int cuDeviceSetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") LongBuffer value) {
return ncuDeviceSetGraphMemAttribute(device, attr, memAddress(value));
}
// --- [ cuGraphClone ] ---
/** Unsafe version of: {@link #cuGraphClone GraphClone} */
public static int ncuGraphClone(long phGraphClone, long originalGraph) {
long __functionAddress = Functions.GraphClone;
if (CHECKS) {
check(__functionAddress);
check(originalGraph);
}
return callPPI(phGraphClone, originalGraph, __functionAddress);
}
/**
* Clones a graph.
*
* This function creates a copy of {@code originalGraph} and returns it in {@code phGraphClone}. All parameters are copied into the cloned graph. The
* original graph may be modified after this call without affecting the clone.
*
* Child graph nodes in the original graph are recursively copied into the clone.
*
* @param phGraphClone returns newly created cloned graph
* @param originalGraph graph to clone
*/
@NativeType("CUresult")
public static int cuGraphClone(@NativeType("CUgraph *") PointerBuffer phGraphClone, @NativeType("CUgraph") long originalGraph) {
if (CHECKS) {
check(phGraphClone, 1);
}
return ncuGraphClone(memAddress(phGraphClone), originalGraph);
}
// --- [ cuGraphNodeFindInClone ] ---
/** Unsafe version of: {@link #cuGraphNodeFindInClone GraphNodeFindInClone} */
public static int ncuGraphNodeFindInClone(long phNode, long hOriginalNode, long hClonedGraph) {
long __functionAddress = Functions.GraphNodeFindInClone;
if (CHECKS) {
check(__functionAddress);
check(hOriginalNode);
check(hClonedGraph);
}
return callPPPI(phNode, hOriginalNode, hClonedGraph, __functionAddress);
}
/**
* Finds a cloned version of a node.
*
* This function returns the node in {@code hClonedGraph} corresponding to {@code hOriginalNode} in the original graph.
*
* {@code hClonedGraph} must have been cloned from {@code hOriginalGraph} via {@link #cuGraphClone GraphClone}. {@code hOriginalNode} must have been in {@code
* hOriginalGraph} at the time of the call to {@link #cuGraphClone GraphClone}, and the corresponding cloned node in {@code hClonedGraph} must not have been removed. The
* cloned node is then returned via {@code phClonedNode}.
*
* @param phNode returns handle to the cloned node
* @param hOriginalNode handle to the original node
* @param hClonedGraph cloned graph to query
*/
@NativeType("CUresult")
public static int cuGraphNodeFindInClone(@NativeType("CUgraphNode *") PointerBuffer phNode, @NativeType("CUgraphNode") long hOriginalNode, @NativeType("CUgraph") long hClonedGraph) {
if (CHECKS) {
check(phNode, 1);
}
return ncuGraphNodeFindInClone(memAddress(phNode), hOriginalNode, hClonedGraph);
}
// --- [ cuGraphNodeGetType ] ---
/** Unsafe version of: {@link #cuGraphNodeGetType GraphNodeGetType} */
public static int ncuGraphNodeGetType(long hNode, long type) {
long __functionAddress = Functions.GraphNodeGetType;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, type, __functionAddress);
}
/**
* Returns a node's type.
*
* Returns the node type of {@code hNode} in {@code type}.
*
* @param hNode node to query
* @param type pointer to return the node type
*/
@NativeType("CUresult")
public static int cuGraphNodeGetType(@NativeType("CUgraphNode") long hNode, @NativeType("CUgraphNodeType *") IntBuffer type) {
if (CHECKS) {
check(type, 1);
}
return ncuGraphNodeGetType(hNode, memAddress(type));
}
// --- [ cuGraphGetNodes ] ---
/**
* Unsafe version of: {@link #cuGraphGetNodes GraphGetNodes}
*
* @param numNodes see description
*/
public static int ncuGraphGetNodes(long hGraph, long nodes, long numNodes) {
long __functionAddress = Functions.GraphGetNodes;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPI(hGraph, nodes, numNodes, __functionAddress);
}
/**
* Returns a graph's nodes.
*
* Returns a list of {@code hGraph's} nodes. {@code nodes} may be {@code NULL}, in which case this function will return the number of nodes in {@code numNodes}.
* Otherwise, {@code numNodes} entries will be filled in. If {@code numNodes} is higher than the actual number of nodes, the remaining entries in {@code
* nodes} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in {@code numNodes}.
*
* @param hGraph graph to query
* @param nodes pointer to return the nodes
* @param numNodes see description
*/
@NativeType("CUresult")
public static int cuGraphGetNodes(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer nodes, @NativeType("size_t *") PointerBuffer numNodes) {
if (CHECKS) {
check(numNodes, 1);
checkSafe(nodes, numNodes.get(numNodes.position()));
}
return ncuGraphGetNodes(hGraph, memAddressSafe(nodes), memAddress(numNodes));
}
// --- [ cuGraphGetRootNodes ] ---
/**
* Unsafe version of: {@link #cuGraphGetRootNodes GraphGetRootNodes}
*
* @param numRootNodes see description
*/
public static int ncuGraphGetRootNodes(long hGraph, long rootNodes, long numRootNodes) {
long __functionAddress = Functions.GraphGetRootNodes;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPI(hGraph, rootNodes, numRootNodes, __functionAddress);
}
/**
* Returns a graph's root nodes.
*
* Returns a list of {@code hGraph's} root nodes. {@code rootNodes} may be {@code NULL}, in which case this function will return the number of root nodes in
* {@code numRootNodes}. Otherwise, {@code numRootNodes} entries will be filled in. If {@code numRootNodes} is higher than the actual number of root
* nodes, the remaining entries in {@code rootNodes} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in {@code
* numRootNodes}.
*
* @param hGraph graph to query
* @param rootNodes pointer to return the root nodes
* @param numRootNodes see description
*/
@NativeType("CUresult")
public static int cuGraphGetRootNodes(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer rootNodes, @NativeType("size_t *") PointerBuffer numRootNodes) {
if (CHECKS) {
check(numRootNodes, 1);
checkSafe(rootNodes, numRootNodes.get(numRootNodes.position()));
}
return ncuGraphGetRootNodes(hGraph, memAddressSafe(rootNodes), memAddress(numRootNodes));
}
// --- [ cuGraphGetEdges ] ---
/**
* Unsafe version of: {@link #cuGraphGetEdges GraphGetEdges}
*
* @param numEdges see description
*/
public static int ncuGraphGetEdges(long hGraph, long from, long to, long numEdges) {
long __functionAddress = Functions.GraphGetEdges;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPI(hGraph, from, to, numEdges, __functionAddress);
}
/**
* Returns a graph's dependency edges.
*
* Returns a list of {@code hGraph's} dependency edges. Edges are returned via corresponding indices in {@code from} and {@code to;} that is, the node in
* {@code to[i]} has a dependency on the node in {@code from[i]}. {@code from} and {@code to} may both be {@code NULL}, in which case this function only returns
* the number of edges in {@code numEdges}. Otherwise, {@code numEdges} entries will be filled in. If {@code numEdges} is higher than the actual number of
* edges, the remaining entries in {@code from} and {@code to} will be set to {@code NULL}, and the number of edges actually returned will be written to
* {@code numEdges}.
*
* @param hGraph graph to get the edges from
* @param from location to return edge endpoints
* @param to location to return edge endpoints
* @param numEdges see description
*/
@NativeType("CUresult")
public static int cuGraphGetEdges(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer from, @Nullable @NativeType("CUgraphNode *") PointerBuffer to, @NativeType("size_t *") PointerBuffer numEdges) {
if (CHECKS) {
check(numEdges, 1);
checkSafe(from, numEdges.get(numEdges.position()));
checkSafe(to, numEdges.get(numEdges.position()));
}
return ncuGraphGetEdges(hGraph, memAddressSafe(from), memAddressSafe(to), memAddress(numEdges));
}
// --- [ cuGraphNodeGetDependencies ] ---
/**
* Unsafe version of: {@link #cuGraphNodeGetDependencies GraphNodeGetDependencies}
*
* @param numDependencies see description
*/
public static int ncuGraphNodeGetDependencies(long hNode, long dependencies, long numDependencies) {
long __functionAddress = Functions.GraphNodeGetDependencies;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPPI(hNode, dependencies, numDependencies, __functionAddress);
}
/**
* Returns a node's dependencies.
*
* Returns a list of {@code node's} dependencies. {@code dependencies} may be {@code NULL}, in which case this function will return the number of dependencies in
* {@code numDependencies}. Otherwise, {@code numDependencies} entries will be filled in. If {@code numDependencies} is higher than the actual number of
* dependencies, the remaining entries in {@code dependencies} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in
* {@code numDependencies}.
*
* @param hNode node to query
* @param dependencies pointer to return the dependencies
* @param numDependencies see description
*/
@NativeType("CUresult")
public static int cuGraphNodeGetDependencies(@NativeType("CUgraphNode") long hNode, @Nullable @NativeType("CUgraphNode *") PointerBuffer dependencies, @NativeType("size_t *") PointerBuffer numDependencies) {
if (CHECKS) {
check(numDependencies, 1);
checkSafe(dependencies, numDependencies.get(numDependencies.position()));
}
return ncuGraphNodeGetDependencies(hNode, memAddressSafe(dependencies), memAddress(numDependencies));
}
// --- [ cuGraphNodeGetDependentNodes ] ---
/**
* Unsafe version of: {@link #cuGraphNodeGetDependentNodes GraphNodeGetDependentNodes}
*
* @param numDependentNodes see description
*/
public static int ncuGraphNodeGetDependentNodes(long hNode, long dependentNodes, long numDependentNodes) {
long __functionAddress = Functions.GraphNodeGetDependentNodes;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPPI(hNode, dependentNodes, numDependentNodes, __functionAddress);
}
/**
* Returns a node's dependent nodes.
*
* Returns a list of {@code node's} dependent nodes. {@code dependentNodes} may be {@code NULL}, in which case this function will return the number of dependent
* nodes in {@code numDependentNodes}. Otherwise, {@code numDependentNodes} entries will be filled in. If {@code numDependentNodes} is higher than the
* actual number of dependent nodes, the remaining entries in {@code dependentNodes} will be set to {@code NULL}, and the number of nodes actually obtained will
* be returned in {@code numDependentNodes}.
*
* @param hNode node to query
* @param dependentNodes pointer to return the dependent nodes
* @param numDependentNodes see description
*/
@NativeType("CUresult")
public static int cuGraphNodeGetDependentNodes(@NativeType("CUgraphNode") long hNode, @Nullable @NativeType("CUgraphNode *") PointerBuffer dependentNodes, @NativeType("size_t *") PointerBuffer numDependentNodes) {
if (CHECKS) {
check(numDependentNodes, 1);
checkSafe(dependentNodes, numDependentNodes.get(numDependentNodes.position()));
}
return ncuGraphNodeGetDependentNodes(hNode, memAddressSafe(dependentNodes), memAddress(numDependentNodes));
}
// --- [ cuGraphAddDependencies ] ---
/**
* Unsafe version of: {@link #cuGraphAddDependencies GraphAddDependencies}
*
* @param numDependencies number of dependencies to be added
*/
public static int ncuGraphAddDependencies(long hGraph, long from, long to, long numDependencies) {
long __functionAddress = Functions.GraphAddDependencies;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPI(hGraph, from, to, numDependencies, __functionAddress);
}
/**
* Adds dependency edges to a graph.
*
* The number of dependencies to be added is defined by {@code numDependencies} Elements in {@code from} and {@code to} at corresponding indices define a
* dependency. Each node in {@code from} and {@code to} must belong to {@code hGraph}.
*
* If {@code numDependencies} is 0, elements in {@code from} and {@code to} will be ignored. Specifying an existing dependency will return an error.
*
* @param hGraph graph to which dependencies are added
* @param from array of nodes that provide the dependencies
* @param to array of dependent nodes
*/
@NativeType("CUresult")
public static int cuGraphAddDependencies(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer from, @Nullable @NativeType("CUgraphNode const *") PointerBuffer to) {
if (CHECKS) {
checkSafe(to, remainingSafe(from));
}
return ncuGraphAddDependencies(hGraph, memAddressSafe(from), memAddressSafe(to), remainingSafe(from));
}
// --- [ cuGraphRemoveDependencies ] ---
/**
* Unsafe version of: {@link #cuGraphRemoveDependencies GraphRemoveDependencies}
*
* @param numDependencies number of dependencies to be removed
*/
public static int ncuGraphRemoveDependencies(long hGraph, long from, long to, long numDependencies) {
long __functionAddress = Functions.GraphRemoveDependencies;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPI(hGraph, from, to, numDependencies, __functionAddress);
}
/**
* Removes dependency edges from a graph.
*
* The number of {@code dependencies} to be removed is defined by {@code numDependencies}. Elements in {@code from} and {@code to} at corresponding
* indices define a dependency. Each node in {@code from} and {@code to} must belong to {@code hGraph}.
*
* If {@code numDependencies} is 0, elements in {@code from} and {@code to} will be ignored. Specifying a non-existing dependency will return an error.
*
* Dependencies cannot be removed from graphs which contain allocation or free nodes. Any attempt to do so will return an error.
*
* @param hGraph graph from which to remove dependencies
* @param from array of nodes that provide the dependencies
* @param to array of dependent nodes
*/
@NativeType("CUresult")
public static int cuGraphRemoveDependencies(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer from, @Nullable @NativeType("CUgraphNode const *") PointerBuffer to) {
if (CHECKS) {
checkSafe(to, remainingSafe(from));
}
return ncuGraphRemoveDependencies(hGraph, memAddressSafe(from), memAddressSafe(to), remainingSafe(from));
}
// --- [ cuGraphDestroyNode ] ---
/**
* Remove a node from the graph.
*
* Removes {@code hNode} from its graph. This operation also severs any dependencies of other nodes on {@code hNode} and vice versa.
*
* Nodes which belong to a graph which contains allocation or free nodes cannot be destroyed. Any attempt to do so will return an error.
*
* @param hNode node to remove
*/
@NativeType("CUresult")
public static int cuGraphDestroyNode(@NativeType("CUgraphNode") long hNode) {
long __functionAddress = Functions.GraphDestroyNode;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPI(hNode, __functionAddress);
}
// --- [ cuGraphInstantiate ] ---
/**
* Unsafe version of: {@link #cuGraphInstantiate GraphInstantiate}
*
* @param bufferSize size of the log buffer in bytes
*/
public static int ncuGraphInstantiate(long phGraphExec, long hGraph, long phErrorNode, long logBuffer, long bufferSize) {
long __functionAddress = Functions.GraphInstantiate;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPPPPI(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize, __functionAddress);
}
/**
* Creates an executable graph from a graph.
*
* Instantiates {@code hGraph} as an executable graph. The graph is validated for any structural constraints or intra-node constraints which were not
* previously validated. If instantiation is successful, a handle to the instantiated graph is returned in {@code phGraphExec}.
*
* If there are any errors, diagnostic information may be returned in {@code errorNode} and {@code logBuffer}. This is the primary way to inspect
* instantiation errors. The output will be null terminated unless the diagnostics overflow the buffer. In this case, they will be truncated, and the last
* byte can be inspected to determine if truncation occurred.
*
* @param phGraphExec returns instantiated graph
* @param hGraph graph to instantiate
* @param phErrorNode in case of an instantiation error, this may be modified to indicate a node contributing to the error
* @param logBuffer a character buffer to store diagnostic messages
*/
@NativeType("CUresult")
public static int cuGraphInstantiate(@NativeType("CUgraphExec *") PointerBuffer phGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("CUgraphNode *") PointerBuffer phErrorNode, @NativeType("char *") ByteBuffer logBuffer) {
if (CHECKS) {
check(phGraphExec, 1);
check(phErrorNode, 1);
}
return ncuGraphInstantiate(memAddress(phGraphExec), hGraph, memAddress(phErrorNode), memAddress(logBuffer), logBuffer.remaining());
}
// --- [ cuGraphInstantiateWithFlags ] ---
/** Unsafe version of: {@link #cuGraphInstantiateWithFlags GraphInstantiateWithFlags} */
public static int ncuGraphInstantiateWithFlags(long phGraphExec, long hGraph, long flags) {
long __functionAddress = Functions.GraphInstantiateWithFlags;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPJI(phGraphExec, hGraph, flags, __functionAddress);
}
/**
* Creates an executable graph from a graph.
*
* Instantiates {@code hGraph} as an executable graph. The graph is validated for any structural constraints or intra-node constraints which were not
* previously validated. If instantiation is successful, a handle to the instantiated graph is returned in {@code phGraphExec}.
*
* The {@code flags} parameter controls the behavior of instantiation and subsequent graph launches. Valid flags are:
*
*
* - {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH}, which configures a graph containing memory allocation nodes to automatically free any unfreed
* memory allocations before the graph is relaunched.
*
*
* If {@code hGraph} contains any allocation or free nodes, there can be at most one executable graph in existence for that graph at a time.
*
* An attempt to instantiate a second executable graph before destroying the first with {@link #cuGraphExecDestroy GraphExecDestroy} will result in an error.
*
* @param phGraphExec returns instantiated graph
* @param hGraph graph to instantiate
* @param flags flags to control instantiation. See {@code CUgraphInstantiate_flags}.
*/
@NativeType("CUresult")
public static int cuGraphInstantiateWithFlags(@NativeType("CUgraphExec *") PointerBuffer phGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("unsigned long long") long flags) {
if (CHECKS) {
check(phGraphExec, 1);
}
return ncuGraphInstantiateWithFlags(memAddress(phGraphExec), hGraph, flags);
}
// --- [ cuGraphExecKernelNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecKernelNodeSetParams GraphExecKernelNodeSetParams} */
public static int ncuGraphExecKernelNodeSetParams(long hGraphExec, long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExecKernelNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
CUDA_KERNEL_NODE_PARAMS.validate(nodeParams);
}
return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress);
}
/**
* Sets the parameters for a kernel node in the given {@code graphExec}.
*
* Sets the parameters of a kernel node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the
* non-executable graph, from which the executable graph was instantiated.
*
* {@code hNode} must not have been removed from the original graph. The {@code func} field of {@code nodeParams} cannot be modified and must match the
* original value. All other values can be modified.
*
* The modifications take effect at the next launch of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode kernel node from the graph from which graphExec was instantiated
* @param nodeParams updated parameters to set
*/
@NativeType("CUresult")
public static int cuGraphExecKernelNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) {
return ncuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams.address());
}
// --- [ cuGraphExecMemcpyNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecMemcpyNodeSetParams GraphExecMemcpyNodeSetParams} */
public static int ncuGraphExecMemcpyNodeSetParams(long hGraphExec, long hNode, long copyParams, long ctx) {
long __functionAddress = Functions.GraphExecMemcpyNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
check(ctx);
}
return callPPPPI(hGraphExec, hNode, copyParams, ctx, __functionAddress);
}
/**
* Sets the parameters for a memcpy node in the given {@code graphExec}.
*
* Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code copyParams} at instantiation.
* {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.
*
* The source and destination memory in {@code copyParams} must be allocated from the same contexts as the original source and destination memory. Both
* the instantiation-time memory operands and the memory operands in {@code copyParams} must be 1-dimensional. Zero-length operations are not supported.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. hNode is also not modified by this call.
*
* Returns {@link #CUDA_ERROR_INVALID_VALUE} if the memory operands' mappings changed or either the original or new memory operands are multidimensional.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode memcpy node from the graph which was used to instantiate graphExec
* @param copyParams the updated parameters to set
* @param ctx context on which to run the node
*/
@NativeType("CUresult")
public static int cuGraphExecMemcpyNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D copyParams, @NativeType("CUcontext") long ctx) {
return ncuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams.address(), ctx);
}
// --- [ cuGraphExecMemsetNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecMemsetNodeSetParams GraphExecMemsetNodeSetParams} */
public static int ncuGraphExecMemsetNodeSetParams(long hGraphExec, long hNode, long memsetParams, long ctx) {
long __functionAddress = Functions.GraphExecMemsetNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
CUDA_MEMSET_NODE_PARAMS.validate(memsetParams);
check(ctx);
}
return callPPPPI(hGraphExec, hNode, memsetParams, ctx, __functionAddress);
}
/**
* Sets the parameters for a {@code memset} node in the given {@code graphExec}.
*
* Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code memsetParams} at instantiation.
* {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.
*
* The destination memory in {@code memsetParams} must be allocated from the same contexts as the original destination memory. Both the instantiation-time
* memory operand and the memory operand in {@code memsetParams} must be 1-dimensional. Zero-length operations are not supported.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. hNode is also not modified by this call.
*
* Returns CUDA_ERROR_INVALID_VALUE if the memory operand's mappings changed or either the original or new memory operand are multidimensional.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode memset node from the graph which was used to instantiate graphExec
* @param memsetParams the updated parameters to set
* @param ctx context on which to run the node
*/
@NativeType("CUresult")
public static int cuGraphExecMemsetNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS memsetParams, @NativeType("CUcontext") long ctx) {
return ncuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams.address(), ctx);
}
// --- [ cuGraphExecHostNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecHostNodeSetParams GraphExecHostNodeSetParams} */
public static int ncuGraphExecHostNodeSetParams(long hGraphExec, long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExecHostNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
CUDA_HOST_NODE_PARAMS.validate(nodeParams);
}
return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress);
}
/**
* Sets the parameters for a host node in the given {@code graphExec}.
*
* Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code nodeParams} at instantiation.
* {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. hNode is also not modified by this call.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode host node from the graph which was used to instantiate graphExec
* @param nodeParams the updated parameters to set
*/
@NativeType("CUresult")
public static int cuGraphExecHostNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) {
return ncuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams.address());
}
// --- [ cuGraphExecChildGraphNodeSetParams ] ---
/**
* Updates node parameters in the child graph node in the given {@code graphExec}.
*
* Updates the work represented by {@code hNode} in {@code hGraphExec} as though the nodes contained in {@code hNode's} graph had the parameters contained
* in {@code childGraph's} nodes at instantiation. {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges
* to and from {@code hNode} are ignored.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* The topology of {@code childGraph}, as well as the node insertion order, must match that of the graph contained in {@code hNode}. See
* {@link #cuGraphExecUpdate GraphExecUpdate} for a list of restrictions on what can be updated in an instantiated graph. The update is recursive, so child graph nodes
* contained within the top level child graph will also be updated.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode host node from the graph which was used to instantiate {@code graphExec}
* @param childGraph the graph supplying the updated parameters
*/
@NativeType("CUresult")
public static int cuGraphExecChildGraphNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUgraph") long childGraph) {
long __functionAddress = Functions.GraphExecChildGraphNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
check(childGraph);
}
return callPPPI(hGraphExec, hNode, childGraph, __functionAddress);
}
// --- [ cuGraphExecEventRecordNodeSetEvent ] ---
/**
* Sets the event for an event record node in the given {@code graphExec}.
*
* Sets the event of an event record node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the
* non-executable graph, from which the executable graph was instantiated.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode event record node from the graph from which graphExec was instantiated
* @param event updated event to use
*/
@NativeType("CUresult")
public static int cuGraphExecEventRecordNodeSetEvent(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) {
long __functionAddress = Functions.GraphExecEventRecordNodeSetEvent;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
check(event);
}
return callPPPI(hGraphExec, hNode, event, __functionAddress);
}
// --- [ cuGraphExecEventWaitNodeSetEvent ] ---
/**
* Sets the event for an event wait node in the given {@code graphExec}.
*
* Sets the event of an event wait node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the
* non-executable graph, from which the executable graph was instantiated.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode event wait node from the graph from which graphExec was instantiated
* @param event updated event to use
*/
@NativeType("CUresult")
public static int cuGraphExecEventWaitNodeSetEvent(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) {
long __functionAddress = Functions.GraphExecEventWaitNodeSetEvent;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
check(event);
}
return callPPPI(hGraphExec, hNode, event, __functionAddress);
}
// --- [ cuGraphExecExternalSemaphoresSignalNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecExternalSemaphoresSignalNodeSetParams GraphExecExternalSemaphoresSignalNodeSetParams} */
public static int ncuGraphExecExternalSemaphoresSignalNodeSetParams(long hGraphExec, long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExecExternalSemaphoresSignalNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams);
}
return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress);
}
/**
* Sets the parameters for an external semaphore signal node in the given {@code graphExec}.
*
* Sets the parameters of an external semaphore signal node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node
* {@code hNode} in the non-executable graph, from which the executable graph was instantiated.
*
* {@code hNode} must not have been removed from the original graph.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* Changing {@code nodeParams->numExtSems} is not supported.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode semaphore signal node from the graph from which graphExec was instantiated
* @param nodeParams updated Parameters to set
*/
@NativeType("CUresult")
public static int cuGraphExecExternalSemaphoresSignalNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) {
return ncuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams.address());
}
// --- [ cuGraphExecExternalSemaphoresWaitNodeSetParams ] ---
/** Unsafe version of: {@link #cuGraphExecExternalSemaphoresWaitNodeSetParams GraphExecExternalSemaphoresWaitNodeSetParams} */
public static int ncuGraphExecExternalSemaphoresWaitNodeSetParams(long hGraphExec, long hNode, long nodeParams) {
long __functionAddress = Functions.GraphExecExternalSemaphoresWaitNodeSetParams;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hNode);
CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams);
}
return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress);
}
/**
* Sets the parameters for an external semaphore wait node in the given graphExec.
*
* Sets the parameters of an external semaphore wait node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node
* {@code hNode} in the non-executable graph, from which the executable graph was instantiated.
*
* {@code hNode} must not have been removed from the original graph.
*
* The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by
* this call. {@code hNode} is also not modified by this call.
*
* Changing {@code nodeParams->numExtSems} is not supported.
*
* @param hGraphExec the executable graph in which to set the specified node
* @param hNode semaphore wait node from the graph from which graphExec was instantiated
* @param nodeParams updated Parameters to set
*/
@NativeType("CUresult")
public static int cuGraphExecExternalSemaphoresWaitNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) {
return ncuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams.address());
}
// --- [ cuGraphUpload ] ---
/**
* Uploads an executable graph in a stream.
*
* Uploads {@code hGraphExec} to the device in {@code hStream} without executing it. Uploads of the same {@code hGraphExec} will be serialized. Each
* upload is ordered behind both any previous work in {@code hStream} and any previous launches of {@code hGraphExec}. Uses memory cached by {@code
* stream} to back the allocations owned by {@code hGraphExec}.
*
* @param hGraphExec executable graph to upload
* @param hStream stream in which to upload the graph
*/
@NativeType("CUresult")
public static int cuGraphUpload(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.GraphUpload;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
}
return callPPI(hGraphExec, hStream, __functionAddress);
}
// --- [ cuGraphLaunch ] ---
/**
* Launches an executable graph in a stream.
*
* Executes {@code hGraphExec} in {@code hStream}. Only one instance of {@code hGraphExec} may be executing at a time. Each launch is ordered behind both
* any previous work in {@code hStream} and any previous launches of {@code hGraphExec}. To execute a graph concurrently, it must be instantiated multiple
* times into multiple executable graphs.
*
* If any allocations created by {@code hGraphExec} remain unfreed (from a previous launch) and {@code hGraphExec} was not instantiated with
* {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH}, the launch will fail with {@link #CUDA_ERROR_INVALID_VALUE}.
*
* @param hGraphExec executable graph to launch
* @param hStream stream in which to launch the graph
*/
@NativeType("CUresult")
public static int cuGraphLaunch(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUstream") long hStream) {
long __functionAddress = Functions.GraphLaunch;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
}
return callPPI(hGraphExec, hStream, __functionAddress);
}
// --- [ cuGraphExecDestroy ] ---
/**
* Destroys an executable graph.
*
* Destroys the executable graph specified by {@code hGraphExec}, as well as all of its executable nodes. If the executable graph is in-flight, it will
* not be terminated, but rather freed asynchronously on completion.
*
* @param hGraphExec executable graph to destroy
*/
@NativeType("CUresult")
public static int cuGraphExecDestroy(@NativeType("CUgraphExec") long hGraphExec) {
long __functionAddress = Functions.GraphExecDestroy;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
}
return callPI(hGraphExec, __functionAddress);
}
// --- [ cuGraphDestroy ] ---
/**
* Destroys a graph.
*
* Destroys the graph specified by {@code hGraph}, as well as all of its nodes.
*
* @param hGraph graph to destroy
*/
@NativeType("CUresult")
public static int cuGraphDestroy(@NativeType("CUgraph") long hGraph) {
long __functionAddress = Functions.GraphDestroy;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPI(hGraph, __functionAddress);
}
// --- [ cuGraphExecUpdate ] ---
/** Unsafe version of: {@link #cuGraphExecUpdate GraphExecUpdate} */
public static int ncuGraphExecUpdate(long hGraphExec, long hGraph, long hErrorNode_out, long updateResult_out) {
long __functionAddress = Functions.GraphExecUpdate;
if (CHECKS) {
check(__functionAddress);
check(hGraphExec);
check(hGraph);
}
return callPPPPI(hGraphExec, hGraph, hErrorNode_out, updateResult_out, __functionAddress);
}
/**
* Check whether an executable graph can be updated with a graph and perform the update if possible.
*
* Updates the node parameters in the instantiated graph specified by {@code hGraphExec} with the node parameters in a topologically identical graph
* specified by {@code hGraph}.
*
* Limitations:
*
*
* - Kernel nodes:
*
*
* - The owning context of the function cannot change.
* - A node whose function originally did not use CUDA dynamic parallelism cannot be updated to a function which uses CDP
*
* - Memset and memcpy nodes:
*
*
* - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.
* - The source/destination memory must be allocated from the same contexts as the original source/destination memory.
* - Only 1D memsets can be changed.
*
* - Additional memcpy node restrictions:
*
*
* - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
*
* - External semaphore wait nodes and record nodes:
*
*
* - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
*
*
*
* Note: The API may add further restrictions in future releases. The return code should always be checked.
*
* {@code cuGraphExecUpdate} sets {@code updateResult_out} to {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} under the following conditions:
*
*
* - The count of nodes directly in {@code hGraphExec} and {@code hGraph} differ, in which case {@code hErrorNode_out} is {@code NULL}.
* - A node is deleted in {@code hGraph} but not not its pair from {@code hGraphExec}, in which case {@code hErrorNode_out} is {@code NULL}.
* - A node is deleted in {@code hGraphExec} but not its pair from {@code hGraph}, in which case {@code hErrorNode_out} is the pairless node from {@code
* hGraph}.
* - The dependent nodes of a pair differ, in which case {@code hErrorNode_out} is the node from {@code hGraph}.
*
*
* {@code cuGraphExecUpdate} sets {@code updateResult_out} to:
*
*
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR} if passed an invalid value.
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} if the graph topology changed
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED} if the type of a node changed, in which case {@code hErrorNode_out} is set to the node from
* {@code hGraph}.
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE} if the function changed in an unsupported way(see note above), in which case
* {@code hErrorNode_out} is set to the node from {@code hGraph}
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED} if any parameters to a node changed in a way that is not supported, in which case
* {@code hErrorNode_out} is set to the node from {@code hGraph}.
* - {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED} if something about a node is unsupported, like the node's type or configuration, in which case
* {@code hErrorNode_out} is set to the node from {@code hGraph}
*
*
* If {@code updateResult_out} isn't set in one of the situations described above, the update check passes and cuGraphExecUpdate updates
* {@code hGraphExec} to match the contents of {@code hGraph}. If an error happens during the update, {@code updateResult_out} will be set to
* {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR}; otherwise, {@code updateResult_out} is set to {@link #CU_GRAPH_EXEC_UPDATE_SUCCESS GRAPH_EXEC_UPDATE_SUCCESS}.
*
* {@code cuGraphExecUpdate} returns {@link #CUDA_SUCCESS} when the updated was performed successfully. It returns {@link #CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE} if the
* graph update was not performed because it included changes which violated constraints specific to instantiated graph update.
*
* @param hGraphExec the instantiated graph to be updated
* @param hGraph the graph containing the updated parameters
* @param hErrorNode_out the node which caused the permissibility check to forbid the update, if any
* @param updateResult_out whether the graph update was permitted. If was forbidden, the reason why.
*/
@NativeType("CUresult")
public static int cuGraphExecUpdate(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("CUgraphNode *") PointerBuffer hErrorNode_out, @NativeType("CUgraphExecUpdateResult *") IntBuffer updateResult_out) {
if (CHECKS) {
check(hErrorNode_out, 1);
check(updateResult_out, 1);
}
return ncuGraphExecUpdate(hGraphExec, hGraph, memAddress(hErrorNode_out), memAddress(updateResult_out));
}
// --- [ cuGraphKernelNodeCopyAttributes ] ---
/**
* Copies attributes from source node to destination node.
*
* Copies attributes from source node {@code src} to destination node {@code dst}. Both node must have the same context.
*
* @param dst destination node
* @param src source node. For list of attributes see {@code CUkernelNodeAttrID}.
*/
@NativeType("CUresult")
public static int cuGraphKernelNodeCopyAttributes(@NativeType("CUgraphNode") long dst, @NativeType("CUgraphNode") long src) {
long __functionAddress = Functions.GraphKernelNodeCopyAttributes;
if (CHECKS) {
check(__functionAddress);
check(dst);
check(src);
}
return callPPI(dst, src, __functionAddress);
}
// --- [ cuGraphKernelNodeGetAttribute ] ---
/** Unsafe version of: {@link #cuGraphKernelNodeGetAttribute GraphKernelNodeGetAttribute} */
public static int ncuGraphKernelNodeGetAttribute(long hNode, int attr, long value_out) {
long __functionAddress = Functions.GraphKernelNodeGetAttribute;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, attr, value_out, __functionAddress);
}
/**
* Queries node attribute.
*
* Queries attribute {@code attr} from node {@code hNode} and stores it in corresponding member of {@code value_out}.
*/
@NativeType("CUresult")
public static int cuGraphKernelNodeGetAttribute(@NativeType("CUgraphNode") long hNode, @NativeType("CUkernelNodeAttrID") int attr, @NativeType("CUkernelNodeAttrValue *") CUkernelNodeAttrValue value_out) {
return ncuGraphKernelNodeGetAttribute(hNode, attr, value_out.address());
}
// --- [ cuGraphKernelNodeSetAttribute ] ---
/** Unsafe version of: {@link #cuGraphKernelNodeSetAttribute GraphKernelNodeSetAttribute} */
public static int ncuGraphKernelNodeSetAttribute(long hNode, int attr, long value) {
long __functionAddress = Functions.GraphKernelNodeSetAttribute;
if (CHECKS) {
check(__functionAddress);
check(hNode);
}
return callPPI(hNode, attr, value, __functionAddress);
}
/**
* Sets node attribute.
*
* Sets attribute {@code attr} on node {@code hNode} from corresponding attribute of {@code value}.
*/
@NativeType("CUresult")
public static int cuGraphKernelNodeSetAttribute(@NativeType("CUgraphNode") long hNode, @NativeType("CUkernelNodeAttrID") int attr, @NativeType("CUkernelNodeAttrValue const *") CUkernelNodeAttrValue value) {
return ncuGraphKernelNodeSetAttribute(hNode, attr, value.address());
}
// --- [ cuGraphDebugDotPrint ] ---
/** Unsafe version of: {@link #cuGraphDebugDotPrint GraphDebugDotPrint} */
public static int ncuGraphDebugDotPrint(long hGraph, long path, int flags) {
long __functionAddress = Functions.GraphDebugDotPrint;
if (CHECKS) {
check(__functionAddress);
check(hGraph);
}
return callPPI(hGraph, path, flags, __functionAddress);
}
/**
* Write a DOT file describing graph structure.
*
* Using the provided {@code hGraph}, write to {@code path} a DOT formatted description of the graph. By default this includes the graph topology, node
* types, node id, kernel names and memcpy direction. {@code flags} can be specified to write more detailed information about each node type such as
* parameter values, kernel attributes, node and function handles.
*
* @param hGraph the graph to create a DOT file from
* @param path the path to write the DOT file to
* @param flags flags from {@code CUgraphDebugDot_flags} for specifying which additional node information to write
*/
@NativeType("CUresult")
public static int cuGraphDebugDotPrint(@NativeType("CUgraph") long hGraph, @NativeType("char const *") ByteBuffer path, @NativeType("unsigned int") int flags) {
if (CHECKS) {
checkNT1(path);
}
return ncuGraphDebugDotPrint(hGraph, memAddress(path), flags);
}
/**
* Write a DOT file describing graph structure.
*
* Using the provided {@code hGraph}, write to {@code path} a DOT formatted description of the graph. By default this includes the graph topology, node
* types, node id, kernel names and memcpy direction. {@code flags} can be specified to write more detailed information about each node type such as
* parameter values, kernel attributes, node and function handles.
*
* @param hGraph the graph to create a DOT file from
* @param path the path to write the DOT file to
* @param flags flags from {@code CUgraphDebugDot_flags} for specifying which additional node information to write
*/
@NativeType("CUresult")
public static int cuGraphDebugDotPrint(@NativeType("CUgraph") long hGraph, @NativeType("char const *") CharSequence path, @NativeType("unsigned int") int flags) {
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nUTF8(path, true);
long pathEncoded = stack.getPointerAddress();
return ncuGraphDebugDotPrint(hGraph, pathEncoded, flags);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuUserObjectCreate ] ---
/** Unsafe version of: {@link #cuUserObjectCreate UserObjectCreate} */
public static int ncuUserObjectCreate(long object_out, long ptr, long destroy, int initialRefcount, int flags) {
long __functionAddress = Functions.UserObjectCreate;
if (CHECKS) {
check(__functionAddress);
check(ptr);
}
return callPPPI(object_out, ptr, destroy, initialRefcount, flags, __functionAddress);
}
/**
* Create a user object.
*
* Create a user object with the specified destructor callback and initial reference count. The initial references are owned by the caller.
*
* Destructor callbacks cannot make CUDA API calls and should avoid blocking behavior, as they are executed by a shared internal thread. Another thread
* may be signaled to perform such actions, if it does not block forward progress of tasks scheduled through CUDA.
*
* See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
*
* @param object_out location to return the user object handle
* @param ptr the pointer to pass to the destroy function
* @param destroy callback to free the user object when it is no longer in use
* @param initialRefcount the initial refcount to create the object with, typically 1. The initial references are owned by the calling thread.
* @param flags currently it is required to pass {@link #CU_USER_OBJECT_NO_DESTRUCTOR_SYNC USER_OBJECT_NO_DESTRUCTOR_SYNC}, which is the only defined flag. This indicates that the destroy callback
* cannot be waited on by any CUDA API. Users requiring synchronization of the callback should signal its completion manually.
*/
@NativeType("CUresult")
public static int cuUserObjectCreate(@NativeType("CUuserObject *") PointerBuffer object_out, @NativeType("void *") long ptr, @NativeType("void (*) (void *)") CUhostFnI destroy, @NativeType("unsigned int") int initialRefcount, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(object_out, 1);
}
return ncuUserObjectCreate(memAddress(object_out), ptr, destroy.address(), initialRefcount, flags);
}
// --- [ cuUserObjectRetain ] ---
/**
* Retain a reference to a user object.
*
* Retains new references to a user object. The new references are owned by the caller.
*
* See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
*
* @param object the object to retain
* @param count the number of references to retain, typically 1. Must be nonzero and not larger than INT_MAX.
*/
@NativeType("CUresult")
public static int cuUserObjectRetain(@NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) {
long __functionAddress = Functions.UserObjectRetain;
if (CHECKS) {
check(__functionAddress);
check(object);
}
return callPI(object, count, __functionAddress);
}
// --- [ cuUserObjectRelease ] ---
/**
* Release a reference to a user object.
*
* Releases user object references owned by the caller. The object's destructor is invoked if the reference count reaches zero.
*
* It is undefined behavior to release references not owned by the caller, or to use a user object handle after all references are released.
*
* See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
*
* @param object the object to release
* @param count the number of references to release, typically 1. Must be nonzero and not larger than INT_MAX.
*/
@NativeType("CUresult")
public static int cuUserObjectRelease(@NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) {
long __functionAddress = Functions.UserObjectRelease;
if (CHECKS) {
check(__functionAddress);
check(object);
}
return callPI(object, count, __functionAddress);
}
// --- [ cuGraphRetainUserObject ] ---
/**
* Retain a reference to a user object from a graph.
*
* Creates or moves user object references that will be owned by a CUDA graph.
*
* See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
*
* @param graph the graph to associate the reference with
* @param object the user object to retain a reference for
* @param count the number of references to add to the graph, typically 1. Must be nonzero and not larger than INT_MAX.
* @param flags the optional flag {@link #CU_GRAPH_USER_OBJECT_MOVE GRAPH_USER_OBJECT_MOVE} transfers references from the calling thread, rather than create new references. Pass 0 to create new
* references.
*/
@NativeType("CUresult")
public static int cuGraphRetainUserObject(@NativeType("CUgraph") long graph, @NativeType("CUuserObject") long object, @NativeType("unsigned int") int count, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.GraphRetainUserObject;
if (CHECKS) {
check(__functionAddress);
check(graph);
check(object);
}
return callPPI(graph, object, count, flags, __functionAddress);
}
// --- [ cuGraphReleaseUserObject ] ---
/**
* Release a user object reference from a graph.
*
* Releases user object references owned by a graph.
*
* See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
*
* @param graph the graph that will release the reference
* @param object the user object to release a reference for
* @param count the number of references to release, typically 1. Must be nonzero and not larger than INT_MAX.
*/
@NativeType("CUresult")
public static int cuGraphReleaseUserObject(@NativeType("CUgraph") long graph, @NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) {
long __functionAddress = Functions.GraphReleaseUserObject;
if (CHECKS) {
check(__functionAddress);
check(graph);
check(object);
}
return callPPI(graph, object, count, __functionAddress);
}
// --- [ cuOccupancyMaxActiveBlocksPerMultiprocessor ] ---
/** Unsafe version of: {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} */
public static int ncuOccupancyMaxActiveBlocksPerMultiprocessor(long numBlocks, long func, int blockSize, long dynamicSMemSize) {
long __functionAddress = Functions.OccupancyMaxActiveBlocksPerMultiprocessor;
if (CHECKS) {
check(__functionAddress);
check(func);
}
return callPPPI(numBlocks, func, blockSize, dynamicSMemSize, __functionAddress);
}
/**
* Returns occupancy of a function.
*
* Returns in {@code *numBlocks} the number of the maximum active blocks per streaming multiprocessor.
*
* @param numBlocks returned occupancy
* @param func kernel for which occupancy is calculated
* @param blockSize block size the kernel is intended to be launched with
* @param dynamicSMemSize per-block dynamic shared memory usage intended, in bytes
*/
@NativeType("CUresult")
public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(@NativeType("int *") IntBuffer numBlocks, @NativeType("CUfunction") long func, int blockSize, @NativeType("size_t") long dynamicSMemSize) {
if (CHECKS) {
check(numBlocks, 1);
}
return ncuOccupancyMaxActiveBlocksPerMultiprocessor(memAddress(numBlocks), func, blockSize, dynamicSMemSize);
}
// --- [ cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags ] ---
/** Unsafe version of: {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags} */
public static int ncuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(long numBlocks, long func, int blockSize, long dynamicSMemSize, int flags) {
long __functionAddress = Functions.OccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
if (CHECKS) {
check(__functionAddress);
check(func);
}
return callPPPI(numBlocks, func, blockSize, dynamicSMemSize, flags, __functionAddress);
}
/**
* Returns occupancy of a function.
*
* Returns in {@code *numBlocks} the number of the maximum active blocks per streaming multiprocessor.
*
* The {@code Flags} parameter controls how special cases are handled. The valid flags are:
*
*
* - {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT}, which maintains the default behavior as {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor};
* - {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE}, which suppresses the default behavior on platform where global caching affects occupancy. On such
* platforms, if caching is enabled, but per-block SM resource usage would result in zero occupancy, the occupancy calculator will calculate the
* occupancy as if caching is disabled. Setting {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} makes the occupancy calculator to return 0 in such cases. More
* information can be found about this feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide.
*
*
* @param numBlocks returned occupancy
* @param func kernel for which occupancy is calculated
* @param blockSize block size the kernel is intended to be launched with
* @param dynamicSMemSize per-block dynamic shared memory usage intended, in bytes
* @param flags requested behavior for the occupancy calculator
*/
@NativeType("CUresult")
public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(@NativeType("int *") IntBuffer numBlocks, @NativeType("CUfunction") long func, int blockSize, @NativeType("size_t") long dynamicSMemSize, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(numBlocks, 1);
}
return ncuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(memAddress(numBlocks), func, blockSize, dynamicSMemSize, flags);
}
// --- [ cuOccupancyMaxPotentialBlockSize ] ---
/** Unsafe version of: {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize} */
public static int ncuOccupancyMaxPotentialBlockSize(long minGridSize, long blockSize, long func, long blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit) {
long __functionAddress = Functions.OccupancyMaxPotentialBlockSize;
if (CHECKS) {
check(__functionAddress);
check(func);
}
return callPPPPPI(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, __functionAddress);
}
/**
* Suggest a launch configuration with reasonable occupancy.
*
* Returns in {@code *blockSize} a reasonable block size that can achieve the maximum occupancy (or, the maximum number of active warps with the fewest
* blocks per multiprocessor), and in {@code *minGridSize} the minimum grid size to achieve the maximum occupancy.
*
* If {@code blockSizeLimit} is 0, the configurator will use the maximum block size permitted by the device / function instead.
*
* If per-block dynamic shared memory allocation is not needed, the user should leave both {@code blockSizeToDynamicSMemSize} and {@code dynamicSMemSize}
* as 0.
*
* If per-block dynamic shared memory allocation is needed, then if the dynamic shared memory size is constant regardless of block size, the size should
* be passed through {@code dynamicSMemSize}, and {@code blockSizeToDynamicSMemSize} should be {@code NULL}.
*
* Otherwise, if the per-block dynamic shared memory size varies with different block sizes, the user needs to provide a unary function through {@code
* blockSizeToDynamicSMemSize} that computes the dynamic shared memory needed by {@code func} for any given block size. {@code dynamicSMemSize} is
* ignored. An example signature is:
*
*
* // Take block size, returns dynamic shared memory needed
* size_t blockToSmem(int blockSize);
*
* @param minGridSize returned minimum grid size needed to achieve the maximum occupancy
* @param blockSize returned maximum block size that can achieve the maximum occupancy
* @param func kernel for which launch configuration is calculated
* @param blockSizeToDynamicSMemSize a function that calculates how much per-block dynamic shared memory {@code func} uses based on the block size
* @param dynamicSMemSize dynamic shared memory usage intended, in bytes
* @param blockSizeLimit the maximum block size {@code func} is designed to handle
*/
@NativeType("CUresult")
public static int cuOccupancyMaxPotentialBlockSize(@NativeType("int *") IntBuffer minGridSize, @NativeType("int *") IntBuffer blockSize, @NativeType("CUfunction") long func, @Nullable @NativeType("size_t (*) (int)") CUoccupancyB2DSizeI blockSizeToDynamicSMemSize, @NativeType("size_t") long dynamicSMemSize, int blockSizeLimit) {
if (CHECKS) {
check(minGridSize, 1);
check(blockSize, 1);
}
return ncuOccupancyMaxPotentialBlockSize(memAddress(minGridSize), memAddress(blockSize), func, memAddressSafe(blockSizeToDynamicSMemSize), dynamicSMemSize, blockSizeLimit);
}
// --- [ cuOccupancyMaxPotentialBlockSizeWithFlags ] ---
/** Unsafe version of: {@link #cuOccupancyMaxPotentialBlockSizeWithFlags OccupancyMaxPotentialBlockSizeWithFlags} */
public static int ncuOccupancyMaxPotentialBlockSizeWithFlags(long minGridSize, long blockSize, long func, long blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags) {
long __functionAddress = Functions.OccupancyMaxPotentialBlockSizeWithFlags;
if (CHECKS) {
check(__functionAddress);
check(func);
}
return callPPPPPI(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags, __functionAddress);
}
/**
* Suggest a launch configuration with reasonable occupancy.
*
* An extended version of {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize}. In addition to arguments passed to {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize},
* {@link #cuOccupancyMaxPotentialBlockSizeWithFlags OccupancyMaxPotentialBlockSizeWithFlags} also takes a {@code Flags} parameter.
*
* The {@code Flags} parameter controls how special cases are handled. The valid flags are:
*
*
* - {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT}, which maintains the default behavior as {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize};
* - {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE}, which suppresses the default behavior on platform where global caching affects occupancy. On such
* platforms, the launch configurations that produces maximal occupancy might not support global caching. Setting
* {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} guarantees that the the produced launch configuration is global caching compatible at a potential cost of
* occupancy. More information can be found about this feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide.
*
*
* @param minGridSize returned minimum grid size needed to achieve the maximum occupancy
* @param blockSize returned maximum block size that can achieve the maximum occupancy
* @param func kernel for which launch configuration is calculated
* @param blockSizeToDynamicSMemSize a function that calculates how much per-block dynamic shared memory {@code func} uses based on the block size
* @param dynamicSMemSize dynamic shared memory usage intended, in bytes
* @param blockSizeLimit the maximum block size {@code func} is designed to handle
* @param flags options
*/
@NativeType("CUresult")
public static int cuOccupancyMaxPotentialBlockSizeWithFlags(@NativeType("int *") IntBuffer minGridSize, @NativeType("int *") IntBuffer blockSize, @NativeType("CUfunction") long func, @Nullable @NativeType("size_t (*) (int)") CUoccupancyB2DSizeI blockSizeToDynamicSMemSize, @NativeType("size_t") long dynamicSMemSize, int blockSizeLimit, @NativeType("unsigned int") int flags) {
if (CHECKS) {
check(minGridSize, 1);
check(blockSize, 1);
}
return ncuOccupancyMaxPotentialBlockSizeWithFlags(memAddress(minGridSize), memAddress(blockSize), func, memAddressSafe(blockSizeToDynamicSMemSize), dynamicSMemSize, blockSizeLimit, flags);
}
// --- [ cuOccupancyAvailableDynamicSMemPerBlock ] ---
/** Unsafe version of: {@link #cuOccupancyAvailableDynamicSMemPerBlock OccupancyAvailableDynamicSMemPerBlock} */
public static int ncuOccupancyAvailableDynamicSMemPerBlock(long dynamicSmemSize, long func, int numBlocks, int blockSize) {
long __functionAddress = Functions.OccupancyAvailableDynamicSMemPerBlock;
if (CHECKS) {
check(__functionAddress);
check(func);
}
return callPPI(dynamicSmemSize, func, numBlocks, blockSize, __functionAddress);
}
/**
* Returns dynamic shared memory available per block when launching {@code numBlocks} blocks on SM.
*
* Returns in {@code *dynamicSmemSize} the maximum size of dynamic shared memory to allow {@code numBlocks} blocks per SM.
*
* @param dynamicSmemSize returned maximum dynamic shared memory
* @param func kernel function for which occupancy is calculated
* @param numBlocks number of blocks to fit on SM
* @param blockSize size of the blocks
*/
@NativeType("CUresult")
public static int cuOccupancyAvailableDynamicSMemPerBlock(@NativeType("size_t *") PointerBuffer dynamicSmemSize, @NativeType("CUfunction") long func, int numBlocks, int blockSize) {
if (CHECKS) {
check(dynamicSmemSize, 1);
}
return ncuOccupancyAvailableDynamicSMemPerBlock(memAddress(dynamicSmemSize), func, numBlocks, blockSize);
}
// --- [ cuTexRefSetArray ] ---
/**
* Binds an array as a texture reference. (Deprecated)
*
* Binds the CUDA array {@code hArray} to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture
* reference is superseded by this function. {@code Flags} must be set to {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}. Any CUDA array previously bound to {@code hTexRef} is
* unbound.
*
* @param hTexRef texture reference to bind
* @param hArray array to bind
* @param Flags options (must be {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT})
*/
@NativeType("CUresult")
public static int cuTexRefSetArray(@NativeType("CUtexref") long hTexRef, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.TexRefSetArray;
if (CHECKS) {
check(hTexRef);
check(hArray);
}
return callPPI(hTexRef, hArray, Flags, __functionAddress);
}
// --- [ cuTexRefSetMipmappedArray ] ---
/**
* Binds a mipmapped array to a texture reference. (Deprecated)
*
* Binds the CUDA mipmapped array {@code hMipmappedArray} to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated
* with the texture reference is superseded by this function. {@code Flags} must be set to {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}. Any CUDA array previously bound to
* {@code hTexRef} is unbound.
*
* @param hTexRef texture reference to bind
* @param hMipmappedArray mipmapped array to bind
* @param Flags options (must be {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT})
*/
@NativeType("CUresult")
public static int cuTexRefSetMipmappedArray(@NativeType("CUtexref") long hTexRef, @NativeType("CUmipmappedArray") long hMipmappedArray, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.TexRefSetMipmappedArray;
if (CHECKS) {
check(hTexRef);
check(hMipmappedArray);
}
return callPPI(hTexRef, hMipmappedArray, Flags, __functionAddress);
}
// --- [ cuTexRefSetAddress ] ---
/** Unsafe version of: {@link #cuTexRefSetAddress TexRefSetAddress} */
public static int ncuTexRefSetAddress(long ByteOffset, long hTexRef, long dptr, long bytes) {
long __functionAddress = Functions.TexRefSetAddress;
if (CHECKS) {
check(hTexRef);
check(dptr);
}
return callPPPPI(ByteOffset, hTexRef, dptr, bytes, __functionAddress);
}
/**
* Binds an address as a texture reference. (Deprecated)
*
* Binds a linear address range to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture reference
* is superseded by this function. Any memory previously bound to {@code hTexRef} is unbound.
*
* Since the hardware enforces an alignment requirement on texture base addresses, {@link #cuTexRefSetAddress TexRefSetAddress} passes back a byte offset in {@code *ByteOffset}
* that must be applied to texture fetches in order to read from the desired memory. This offset must be divided by the texel size and passed to kernels
* that read from the texture so they can be applied to the {@code tex1Dfetch()} function.
*
* If the device memory pointer was returned from {@link #cuMemAlloc MemAlloc}, the offset is guaranteed to be 0 and {@code NULL} may be passed as the {@code ByteOffset}
* parameter.
*
* The total number of elements (or texels) in the linear address range cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH}. The number of
* elements is computed as ({@code bytes} / {@code bytesPerElement}), where {@code bytesPerElement} is determined from the data format and number of
* components set using {@link #cuTexRefSetFormat TexRefSetFormat}.
*
* @param ByteOffset returned byte offset
* @param hTexRef texture reference to bind
* @param dptr device pointer to bind
* @param bytes size of memory to bind in bytes
*/
@NativeType("CUresult")
public static int cuTexRefSetAddress(@NativeType("size_t *") PointerBuffer ByteOffset, @NativeType("CUtexref") long hTexRef, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long bytes) {
if (CHECKS) {
check(ByteOffset, 1);
}
return ncuTexRefSetAddress(memAddress(ByteOffset), hTexRef, dptr, bytes);
}
// --- [ cuTexRefSetAddress2D ] ---
/** Unsafe version of: {@link #cuTexRefSetAddress2D TexRefSetAddress2D} */
public static int ncuTexRefSetAddress2D(long hTexRef, long desc, long dptr, long Pitch) {
long __functionAddress = Functions.TexRefSetAddress2D;
if (CHECKS) {
check(hTexRef);
check(dptr);
}
return callPPPPI(hTexRef, desc, dptr, Pitch, __functionAddress);
}
/**
* Binds an address as a 2D texture reference. (Deprecated)
*
* Binds a linear address range to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture reference
* is superseded by this function. Any memory previously bound to {@code hTexRef} is unbound.
*
* Using a {@code tex2D()} function inside a kernel requires a call to either {@link #cuTexRefSetArray TexRefSetArray} to bind the corresponding texture reference to an array,
* or {@link #cuTexRefSetAddress2D TexRefSetAddress2D} to bind the texture reference to linear memory.
*
* Function calls to {@link #cuTexRefSetFormat TexRefSetFormat} cannot follow calls to {@link #cuTexRefSetAddress2D TexRefSetAddress2D} for the same texture reference.
*
* It is required that {@code dptr} be aligned to the appropriate hardware-specific texture alignment. You can query this value using the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. If an unaligned {@code dptr} is supplied, {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* {@code Pitch} has to be aligned to the hardware-specific texture pitch alignment. This value can be queried using the device attribute
* {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT}. If an unaligned {@code Pitch} is supplied, {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* {@code Width} and {@code Height}, which are specified in elements (or texels), cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} and
* {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} respectively. {@code Pitch}, which is specified in bytes, cannot exceed
* {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH}.
*
* @param hTexRef texture reference to bind
* @param desc descriptor of CUDA array
* @param dptr device pointer to bind
* @param Pitch line pitch in bytes
*/
@NativeType("CUresult")
public static int cuTexRefSetAddress2D(@NativeType("CUtexref") long hTexRef, @NativeType("CUDA_ARRAY_DESCRIPTOR const *") CUDA_ARRAY_DESCRIPTOR desc, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long Pitch) {
return ncuTexRefSetAddress2D(hTexRef, desc.address(), dptr, Pitch);
}
// --- [ cuTexRefSetFormat ] ---
/**
* Sets the format for a texture reference. (Deprecated)
*
* Specifies the format of the data to be read by the texture reference {@code hTexRef}. {@code fmt} and {@code NumPackedComponents} are exactly analogous
* to the {@code Format} and {@code NumChannels} members of the {@link CUDA_ARRAY_DESCRIPTOR} structure: They specify the format of each component and the
* number of components per array element.
*
* @param hTexRef texture reference
* @param fmt format to set
* @param NumPackedComponents number of components per array element
*/
@NativeType("CUresult")
public static int cuTexRefSetFormat(@NativeType("CUtexref") long hTexRef, @NativeType("CUarray_format") int fmt, int NumPackedComponents) {
long __functionAddress = Functions.TexRefSetFormat;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, fmt, NumPackedComponents, __functionAddress);
}
// --- [ cuTexRefSetAddressMode ] ---
/**
* Sets the addressing mode for a texture reference. (Deprecated)
*
* Specifies the addressing mode {@code am} for the given dimension {@code dim} of the texture reference {@code hTexRef}. If {@code dim} is zero, the
* addressing mode is applied to the first parameter of the functions used to fetch from the texture; if {@code dim} is 1, the second, and so on.
*
* Note that this call has no effect if {@code hTexRef} is bound to linear memory. Also, if the flag, {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, is not set, the
* only supported address mode is {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP}.
*
* @param hTexRef texture reference
* @param dim dimension
* @param am addressing mode to set
*/
@NativeType("CUresult")
public static int cuTexRefSetAddressMode(@NativeType("CUtexref") long hTexRef, int dim, @NativeType("CUaddress_mode") int am) {
long __functionAddress = Functions.TexRefSetAddressMode;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, dim, am, __functionAddress);
}
// --- [ cuTexRefSetFilterMode ] ---
/**
* Sets the filtering mode for a texture reference. (Deprecated)
*
* Specifies the filtering mode {@code fm} to be used when reading memory through the texture reference {@code hTexRef}.
*
* Note that this call has no effect if {@code hTexRef} is bound to linear memory.
*
* @param hTexRef texture reference
* @param fm filtering mode to set
*/
@NativeType("CUresult")
public static int cuTexRefSetFilterMode(@NativeType("CUtexref") long hTexRef, @NativeType("CUfilter_mode") int fm) {
long __functionAddress = Functions.TexRefSetFilterMode;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, fm, __functionAddress);
}
// --- [ cuTexRefSetMipmapFilterMode ] ---
/**
* Sets the mipmap filtering mode for a texture reference (Deprecated)
*
* Specifies the mipmap filtering mode {@code fm} to be used when reading memory through the texture reference {@code hTexRef}.
*
* Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.
*
* @param hTexRef texture reference
* @param fm filtering mode to set
*/
@NativeType("CUresult")
public static int cuTexRefSetMipmapFilterMode(@NativeType("CUtexref") long hTexRef, @NativeType("CUfilter_mode") int fm) {
long __functionAddress = Functions.TexRefSetMipmapFilterMode;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, fm, __functionAddress);
}
// --- [ cuTexRefSetMipmapLevelBias ] ---
/**
* Sets the mipmap level bias for a texture reference. (Deprecated)
*
* Specifies the mipmap level bias {@code bias} to be added to the specified mipmap level when reading memory through the texture reference
* {@code hTexRef}.
*
* Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.
*
* @param hTexRef texture reference
* @param bias mipmap level bias
*/
@NativeType("CUresult")
public static int cuTexRefSetMipmapLevelBias(@NativeType("CUtexref") long hTexRef, float bias) {
long __functionAddress = Functions.TexRefSetMipmapLevelBias;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, bias, __functionAddress);
}
// --- [ cuTexRefSetMipmapLevelClamp ] ---
/**
* Sets the mipmap min/max mipmap level clamps for a texture reference. (Deprecated)
*
* Specifies the min/max mipmap level clamps, {@code minMipmapLevelClamp} and {@code maxMipmapLevelClamp} respectively, to be used when reading memory
* through the texture reference {@code hTexRef}.
*
* Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.
*
* @param hTexRef texture reference
* @param minMipmapLevelClamp mipmap min level clamp
* @param maxMipmapLevelClamp mipmap max level clamp
*/
@NativeType("CUresult")
public static int cuTexRefSetMipmapLevelClamp(@NativeType("CUtexref") long hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) {
long __functionAddress = Functions.TexRefSetMipmapLevelClamp;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp, __functionAddress);
}
// --- [ cuTexRefSetMaxAnisotropy ] ---
/**
* Sets the maximum anisotropy for a texture reference. (Deprecated)
*
* Specifies the maximum anisotropy {@code maxAniso} to be used when reading memory through the texture reference {@code hTexRef}.
*
* Note that this call has no effect if {@code hTexRef} is bound to linear memory.
*
* @param hTexRef texture reference
* @param maxAniso maximum anisotropy
*/
@NativeType("CUresult")
public static int cuTexRefSetMaxAnisotropy(@NativeType("CUtexref") long hTexRef, @NativeType("unsigned int") int maxAniso) {
long __functionAddress = Functions.TexRefSetMaxAnisotropy;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, maxAniso, __functionAddress);
}
// --- [ cuTexRefSetBorderColor ] ---
/** Unsafe version of: {@link #cuTexRefSetBorderColor TexRefSetBorderColor} */
public static int ncuTexRefSetBorderColor(long hTexRef, long pBorderColor) {
long __functionAddress = Functions.TexRefSetBorderColor;
if (CHECKS) {
check(hTexRef);
}
return callPPI(hTexRef, pBorderColor, __functionAddress);
}
/**
* Sets the border color for a texture reference. (Deprecated)
*
* Specifies the value of the RGBA color via the {@code pBorderColor} to the texture reference {@code hTexRef}. The color value supports only float type
* and holds color components in the following sequence: {@code pBorderColor[0]} holds 'R' component {@code pBorderColor[1]} holds 'G' component
* {@code pBorderColor[2]} holds 'B' component {@code pBorderColor[3]} holds 'A' component.
*
* Note that the color values can be set only when the Address mode is set to {@link #CU_TR_ADDRESS_MODE_BORDER TR_ADDRESS_MODE_BORDER} using {@link #cuTexRefSetAddressMode TexRefSetAddressMode}. Applications using
* integer border color values have to "reinterpret_cast" their values to float.
*
* @param hTexRef texture reference
* @param pBorderColor RGBA color
*/
@NativeType("CUresult")
public static int cuTexRefSetBorderColor(@NativeType("CUtexref") long hTexRef, @NativeType("float *") FloatBuffer pBorderColor) {
if (CHECKS) {
check(pBorderColor, 4);
}
return ncuTexRefSetBorderColor(hTexRef, memAddress(pBorderColor));
}
// --- [ cuTexRefSetFlags ] ---
/**
* Sets the flags for a texture reference. (Deprecated)
*
* Specifies optional flags via {@code Flags} to specify the behavior of data returned through the texture reference {@code hTexRef}. The valid flags are:
*
*
* - {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER}, which suppresses the default behavior of having the texture promote integer data to floating point data in the range [0,
* 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified;
* - {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is the
* width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension;
* - {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION}, which disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering
* performance by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
*
*
* @param hTexRef texture reference
* @param Flags optional flags to set
*/
@NativeType("CUresult")
public static int cuTexRefSetFlags(@NativeType("CUtexref") long hTexRef, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.TexRefSetFlags;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, Flags, __functionAddress);
}
// --- [ cuTexRefGetAddress ] ---
/** Unsafe version of: {@link #cuTexRefGetAddress TexRefGetAddress} */
public static int ncuTexRefGetAddress(long pdptr, long hTexRef) {
long __functionAddress = Functions.TexRefGetAddress;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pdptr, hTexRef, __functionAddress);
}
/**
* Gets the address associated with a texture reference. (Deprecated)
*
* Returns in {@code *pdptr} the base address bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the texture
* reference is not bound to any device memory range.
*
* @param pdptr returned device address
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetAddress(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pdptr, 1);
}
return ncuTexRefGetAddress(memAddress(pdptr), hTexRef);
}
// --- [ cuTexRefGetArray ] ---
/** Unsafe version of: {@link #cuTexRefGetArray TexRefGetArray} */
public static int ncuTexRefGetArray(long phArray, long hTexRef) {
long __functionAddress = Functions.TexRefGetArray;
if (CHECKS) {
check(hTexRef);
}
return callPPI(phArray, hTexRef, __functionAddress);
}
/**
* Gets the array bound to a texture reference. (Deprecated)
*
* Returns in {@code *phArray} the CUDA array bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the texture
* reference is not bound to any CUDA array.
*
* @param phArray returned array
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetArray(@NativeType("CUarray *") PointerBuffer phArray, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(phArray, 1);
}
return ncuTexRefGetArray(memAddress(phArray), hTexRef);
}
// --- [ cuTexRefGetMipmappedArray ] ---
/** Unsafe version of: {@link #cuTexRefGetMipmappedArray TexRefGetMipmappedArray} */
public static int ncuTexRefGetMipmappedArray(long phMipmappedArray, long hTexRef) {
long __functionAddress = Functions.TexRefGetMipmappedArray;
if (CHECKS) {
check(hTexRef);
}
return callPPI(phMipmappedArray, hTexRef, __functionAddress);
}
/**
* Gets the mipmapped array bound to a texture reference. (Deprecated)
*
* Returns in {@code *phMipmappedArray} the CUDA mipmapped array bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if
* the texture reference is not bound to any CUDA mipmapped array.
*
* @param phMipmappedArray returned mipmapped array
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer phMipmappedArray, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(phMipmappedArray, 1);
}
return ncuTexRefGetMipmappedArray(memAddress(phMipmappedArray), hTexRef);
}
// --- [ cuTexRefGetAddressMode ] ---
/** Unsafe version of: {@link #cuTexRefGetAddressMode TexRefGetAddressMode} */
public static int ncuTexRefGetAddressMode(long pam, long hTexRef, int dim) {
long __functionAddress = Functions.TexRefGetAddressMode;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pam, hTexRef, dim, __functionAddress);
}
/**
* Gets the addressing mode used by a texture reference. (Deprecated)
*
* Returns in {@code *pam} the addressing mode corresponding to the dimension {@code dim} of the texture reference {@code hTexRef}. Currently, the only
* valid value for {@code dim} are 0 and 1.
*
* @param pam returned addressing mode
* @param hTexRef texture reference
* @param dim dimension
*/
@NativeType("CUresult")
public static int cuTexRefGetAddressMode(@NativeType("CUaddress_mode *") IntBuffer pam, @NativeType("CUtexref") long hTexRef, int dim) {
if (CHECKS) {
check(pam, 1);
}
return ncuTexRefGetAddressMode(memAddress(pam), hTexRef, dim);
}
// --- [ cuTexRefGetFilterMode ] ---
/** Unsafe version of: {@link #cuTexRefGetFilterMode TexRefGetFilterMode} */
public static int ncuTexRefGetFilterMode(long pfm, long hTexRef) {
long __functionAddress = Functions.TexRefGetFilterMode;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pfm, hTexRef, __functionAddress);
}
/**
* Gets the filter-mode used by a texture reference. (Deprecated)
*
* Returns in {@code *pfm} the filtering mode of the texture reference {@code hTexRef}.
*
* @param pfm returned filtering mode
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetFilterMode(@NativeType("CUfilter_mode *") IntBuffer pfm, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pfm, 1);
}
return ncuTexRefGetFilterMode(memAddress(pfm), hTexRef);
}
// --- [ cuTexRefGetFormat ] ---
/** Unsafe version of: {@link #cuTexRefGetFormat TexRefGetFormat} */
public static int ncuTexRefGetFormat(long pFormat, long pNumChannels, long hTexRef) {
long __functionAddress = Functions.TexRefGetFormat;
if (CHECKS) {
check(hTexRef);
}
return callPPPI(pFormat, pNumChannels, hTexRef, __functionAddress);
}
/**
* Gets the format used by a texture reference. (Deprecated)
*
* Returns in {@code *pFormat} and {@code *pNumChannels} the format and number of components of the CUDA array bound to the texture reference
* {@code hTexRef}. If {@code pFormat} or {@code pNumChannels} is {@code NULL}, it will be ignored.
*
* @param pFormat returned format
* @param pNumChannels returned number of components
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetFormat(@NativeType("CUarray_format *") IntBuffer pFormat, @Nullable @NativeType("int *") IntBuffer pNumChannels, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pFormat, 1);
checkSafe(pNumChannels, 1);
}
return ncuTexRefGetFormat(memAddress(pFormat), memAddressSafe(pNumChannels), hTexRef);
}
// --- [ cuTexRefGetMipmapFilterMode ] ---
/** Unsafe version of: {@link #cuTexRefGetMipmapFilterMode TexRefGetMipmapFilterMode} */
public static int ncuTexRefGetMipmapFilterMode(long pfm, long hTexRef) {
long __functionAddress = Functions.TexRefGetMipmapFilterMode;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pfm, hTexRef, __functionAddress);
}
/**
* Gets the mipmap filtering mode for a texture reference. (Deprecated)
*
* Returns the mipmap filtering mode in {@code pfm} that's used when reading memory through the texture reference {@code hTexRef}.
*
* @param pfm returned mipmap filtering mode
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetMipmapFilterMode(@NativeType("CUfilter_mode *") IntBuffer pfm, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pfm, 1);
}
return ncuTexRefGetMipmapFilterMode(memAddress(pfm), hTexRef);
}
// --- [ cuTexRefGetMipmapLevelBias ] ---
/** Unsafe version of: {@link #cuTexRefGetMipmapLevelBias TexRefGetMipmapLevelBias} */
public static int ncuTexRefGetMipmapLevelBias(long pbias, long hTexRef) {
long __functionAddress = Functions.TexRefGetMipmapLevelBias;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pbias, hTexRef, __functionAddress);
}
/**
* Gets the mipmap level bias for a texture reference. (Deprecated)
*
* Returns the mipmap level bias in {@code pBias} that's added to the specified mipmap level when reading memory through the texture reference {@code
* hTexRef}.
*
* @param pbias returned mipmap level bias
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetMipmapLevelBias(@NativeType("float *") FloatBuffer pbias, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pbias, 1);
}
return ncuTexRefGetMipmapLevelBias(memAddress(pbias), hTexRef);
}
// --- [ cuTexRefGetMipmapLevelClamp ] ---
/** Unsafe version of: {@link #cuTexRefGetMipmapLevelClamp TexRefGetMipmapLevelClamp} */
public static int ncuTexRefGetMipmapLevelClamp(long pminMipmapLevelClamp, long pmaxMipmapLevelClamp, long hTexRef) {
long __functionAddress = Functions.TexRefGetMipmapLevelClamp;
if (CHECKS) {
check(hTexRef);
}
return callPPPI(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef, __functionAddress);
}
/**
* Gets the min/max mipmap level clamps for a texture reference. (Deprecated)
*
* Returns the min/max mipmap level clamps in {@code pminMipmapLevelClamp} and {@code pmaxMipmapLevelClamp} that's used when reading memory through the
* texture reference {@code hTexRef}.
*
* @param pminMipmapLevelClamp returned mipmap min level clamp
* @param pmaxMipmapLevelClamp returned mipmap max level clamp
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetMipmapLevelClamp(@NativeType("float *") FloatBuffer pminMipmapLevelClamp, @NativeType("float *") FloatBuffer pmaxMipmapLevelClamp, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pminMipmapLevelClamp, 1);
check(pmaxMipmapLevelClamp, 1);
}
return ncuTexRefGetMipmapLevelClamp(memAddress(pminMipmapLevelClamp), memAddress(pmaxMipmapLevelClamp), hTexRef);
}
// --- [ cuTexRefGetMaxAnisotropy ] ---
/** Unsafe version of: {@link #cuTexRefGetMaxAnisotropy TexRefGetMaxAnisotropy} */
public static int ncuTexRefGetMaxAnisotropy(long pmaxAniso, long hTexRef) {
long __functionAddress = Functions.TexRefGetMaxAnisotropy;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pmaxAniso, hTexRef, __functionAddress);
}
/**
* Gets the maximum anisotropy for a texture reference. (Deprecated)
*
* Returns the maximum anisotropy in {@code pmaxAniso} that's used when reading memory through the texture reference {@code hTexRef}.
*
* @param pmaxAniso returned maximum anisotropy
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetMaxAnisotropy(@NativeType("int *") IntBuffer pmaxAniso, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pmaxAniso, 1);
}
return ncuTexRefGetMaxAnisotropy(memAddress(pmaxAniso), hTexRef);
}
// --- [ cuTexRefGetBorderColor ] ---
/** Unsafe version of: {@link #cuTexRefGetBorderColor TexRefGetBorderColor} */
public static int ncuTexRefGetBorderColor(long pBorderColor, long hTexRef) {
long __functionAddress = Functions.TexRefGetBorderColor;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pBorderColor, hTexRef, __functionAddress);
}
/**
* Gets the border color used by a texture reference. (Deprecated)
*
* Returns in {@code pBorderColor}, values of the RGBA color used by the texture reference {@code hTexRef}. The color value is of type float and holds
* color components in the following sequence: pBorderColor[0] holds 'R' component pBorderColor[1] holds 'G' component pBorderColor[2] holds 'B' component
* pBorderColor[3] holds 'A' component
*
* @param pBorderColor returned Type and Value of RGBA color
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetBorderColor(@NativeType("float *") FloatBuffer pBorderColor, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pBorderColor, 4);
}
return ncuTexRefGetBorderColor(memAddress(pBorderColor), hTexRef);
}
// --- [ cuTexRefGetFlags ] ---
/** Unsafe version of: {@link #cuTexRefGetFlags TexRefGetFlags} */
public static int ncuTexRefGetFlags(long pFlags, long hTexRef) {
long __functionAddress = Functions.TexRefGetFlags;
if (CHECKS) {
check(hTexRef);
}
return callPPI(pFlags, hTexRef, __functionAddress);
}
/**
* Gets the flags used by a texture reference. (Deprecated)
*
* Returns in {@code *pFlags} the flags of the texture reference {@code hTexRef}.
*
* @param pFlags returned flags
* @param hTexRef texture reference
*/
@NativeType("CUresult")
public static int cuTexRefGetFlags(@NativeType("unsigned int *") IntBuffer pFlags, @NativeType("CUtexref") long hTexRef) {
if (CHECKS) {
check(pFlags, 1);
}
return ncuTexRefGetFlags(memAddress(pFlags), hTexRef);
}
// --- [ cuTexRefCreate ] ---
/** Unsafe version of: {@link #cuTexRefCreate TexRefCreate} */
public static int ncuTexRefCreate(long pTexRef) {
long __functionAddress = Functions.TexRefCreate;
return callPI(pTexRef, __functionAddress);
}
/**
* Creates a texture reference. (Deprecated)
*
* Creates a texture reference and returns its handle in {@code *pTexRef}. Once created, the application must call {@link #cuTexRefSetArray TexRefSetArray} or
* {@link #cuTexRefSetAddress TexRefSetAddress} to associate the reference with allocated memory. Other texture reference functions are used to specify the format and
* interpretation (addressing, filtering, etc.) to be used when the memory is read through this texture reference.
*
* @param pTexRef returned texture reference
*/
@NativeType("CUresult")
public static int cuTexRefCreate(@NativeType("CUtexref *") PointerBuffer pTexRef) {
if (CHECKS) {
check(pTexRef, 1);
}
return ncuTexRefCreate(memAddress(pTexRef));
}
// --- [ cuTexRefDestroy ] ---
/**
* Destroys a texture reference. (Deprecated)
*
* Destroys the texture reference specified by {@code hTexRef}.
*
* @param hTexRef texture reference to destroy
*/
@NativeType("CUresult")
public static int cuTexRefDestroy(@NativeType("CUtexref") long hTexRef) {
long __functionAddress = Functions.TexRefDestroy;
if (CHECKS) {
check(hTexRef);
}
return callPI(hTexRef, __functionAddress);
}
// --- [ cuSurfRefSetArray ] ---
/**
* Sets the CUDA array for a surface reference.Deprecated:
*
* Sets the CUDA array {@code hArray} to be read and written by the surface reference {@code hSurfRef}. Any previous CUDA array state associated with the
* surface reference is superseded by this function. {@code Flags} must be set to 0. The {@link #CUDA_ARRAY3D_SURFACE_LDST} flag must have been set for the CUDA
* array. Any CUDA array previously bound to {@code hSurfRef} is unbound.
*
* @param hSurfRef surface reference handle
* @param hArray CUDA array handle
* @param Flags set to 0
*/
@NativeType("CUresult")
public static int cuSurfRefSetArray(@NativeType("CUsurfref") long hSurfRef, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.SurfRefSetArray;
if (CHECKS) {
check(hSurfRef);
check(hArray);
}
return callPPI(hSurfRef, hArray, Flags, __functionAddress);
}
// --- [ cuSurfRefGetArray ] ---
/** Unsafe version of: {@link #cuSurfRefGetArray SurfRefGetArray} */
public static int ncuSurfRefGetArray(long phArray, long hSurfRef) {
long __functionAddress = Functions.SurfRefGetArray;
if (CHECKS) {
check(hSurfRef);
}
return callPPI(phArray, hSurfRef, __functionAddress);
}
/**
* Passes back the CUDA array bound to a surface reference. (Deprecated)
*
* Returns in {@code *phArray} the CUDA array bound to the surface reference {@code hSurfRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the surface
* reference is not bound to any CUDA array.
*
* @param phArray surface reference handle
* @param hSurfRef surface reference handle
*/
@NativeType("CUresult")
public static int cuSurfRefGetArray(@NativeType("CUarray *") PointerBuffer phArray, @NativeType("CUsurfref") long hSurfRef) {
if (CHECKS) {
check(phArray, 1);
}
return ncuSurfRefGetArray(memAddress(phArray), hSurfRef);
}
// --- [ cuTexObjectCreate ] ---
/** Unsafe version of: {@link #cuTexObjectCreate TexObjectCreate} */
public static int ncuTexObjectCreate(long pTexObject, long pResDesc, long pTexDesc, long pResViewDesc) {
long __functionAddress = Functions.TexObjectCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPPPI(pTexObject, pResDesc, pTexDesc, pResViewDesc, __functionAddress);
}
/**
* Creates a texture object.
*
* Creates a texture object and returns it in {@code pTexObject}. {@code pResDesc} describes the data to texture from. {@code pTexDesc} describes how the
* data should be sampled. {@code pResViewDesc} is an optional argument that specifies an alternate format for the data described by {@code pResDesc}, and
* also describes the subresource region to restrict access to when texturing. {@code pResViewDesc} can only be specified if the type of resource is a
* CUDA array or a CUDA mipmapped array.
*
* Texture objects are only supported on devices of compute capability 3.0 or higher. Additionally, a texture object is an opaque value, and, as such,
* should only be accessed through CUDA API calls.
*
*
* - If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY}, {@code CUDA_RESOURCE_DESC::res::array::hArray} must be set to a valid CUDA
* array handle.
* - If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY}, {@code CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray} must be
* set to a valid CUDA mipmapped array handle.
* - If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}, {@code CUDA_RESOURCE_DESC::res::linear::devPtr} must be set to a valid
* device pointer, that is aligned to {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. {@code CUDA_RESOURCE_DESC::res::linear::format} and
* {@code CUDA_RESOURCE_DESC::res::linear::numChannels} describe the format of each component and the number of components per array element.
* {@code CUDA_RESOURCE_DESC::res::linear::sizeInBytes} specifies the size of the array in bytes. The total number of elements in the linear address
* range cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH}. The number of elements is computed as
* {@code (sizeInBytes / (sizeof(format) * numChannels)).}
* - If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_PITCH2D RESOURCE_TYPE_PITCH2D}, {@code CUDA_RESOURCE_DESC::res::pitch2D::devPtr} must be set to a valid
* device pointer, that is aligned to {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. {@code CUDA_RESOURCE_DESC::res::pitch2D::format} and
* {@code CUDA_RESOURCE_DESC::res::pitch2D::numChannels} describe the format of each component and the number of components per array element.
* {@code CUDA_RESOURCE_DESC::res::pitch2D::width} and {@code CUDA_RESOURCE_DESC::res::pitch2D::height} specify the width and height of the array in
* elements, and cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} and {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} respectively.
* {@code CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes} specifies the pitch between two rows in bytes and has to be aligned to
* {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT}. Pitch cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH}.
* - {@code flags} must be set to zero.
*
*
*
* - {@code CUDA_TEXTURE_DESC::addressMode} specifies the addressing mode for each dimension of the texture data. This is ignored if
* {@code CUDA_RESOURCE_DESC::resType} is {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}. Also, if the flag, {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES} is not set, the only supported
* address mode is {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP}.
* - {@code CUDA_TEXTURE_DESC::filterMode} specifies the filtering mode to be used when fetching from the texture. This is ignored if
* {@code CUDA_RESOURCE_DESC::resType} is {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}.
* - {@code CUDA_TEXTURE_DESC::flags} can be any combination of the following:
*
*
* - {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER}, which suppresses the default behavior of having the texture promote integer data to floating point data in the range [0,
* 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified.
* - {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is the
* width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note that for
* CUDA mipmapped arrays, this flag has to be set.
* - {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION}, which disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering
* performance by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
*
* - {@code CUDA_TEXTURE_DESC::maxAnisotropy} specifies the maximum anisotropy ratio to be used when doing anisotropic filtering. This value will be
* clamped to the range [1,16].
* - {@code CUDA_TEXTURE_DESC::mipmapFilterMode} specifies the filter mode when the calculated mipmap level lies between two defined mipmap levels.
* - {@code CUDA_TEXTURE_DESC::mipmapLevelBias} specifies the offset to be applied to the calculated mipmap level.
* - {@code CUDA_TEXTURE_DESC::minMipmapLevelClamp} specifies the lower end of the mipmap level range to clamp access to.
* - {@code CUDA_TEXTURE_DESC::maxMipmapLevelClamp} specifies the upper end of the mipmap level range to clamp access to.
*
*
*
* - {@code CUDA_RESOURCE_VIEW_DESC::format} specifies how the data contained in the CUDA array or CUDA mipmapped array should be interpreted. Note that
* this can incur a change in size of the texture data. If the resource view format is a block compressed format, then the underlying CUDA array or
* CUDA mipmapped array has to have a base of format {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32}. with 2 or 4 channels, depending on the block compressed format. For
* ex., BC1 and BC4 require the underlying CUDA array to have a format of {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32} with 2 channels. The other BC formats require
* the underlying resource to have the same base format but with 4 channels.
* - {@code CUDA_RESOURCE_VIEW_DESC::width} specifies the new width of the texture data. If the resource view format is a block compressed format, this
* value has to be 4 times the original width of the resource. For non block compressed formats, this value has to be equal to that of the original
* resource.
* - {@code CUDA_RESOURCE_VIEW_DESC::height} specifies the new height of the texture data. If the resource view format is a block compressed format,
* this value has to be 4 times the original height of the resource. For non block compressed formats, this value has to be equal to that of the
* original resource.
* - {@code CUDA_RESOURCE_VIEW_DESC::depth} specifies the new depth of the texture data. This value has to be equal to that of the original resource.
* - {@code CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel} specifies the most detailed mipmap level. This will be the new mipmap level zero. For
* non-mipmapped resources, this value has to be zero. {@code CUDA_TEXTURE_DESC::minMipmapLevelClamp} and
* {@code CUDA_TEXTURE_DESC::maxMipmapLevelClamp} will be relative to this value. For ex., if the {@code firstMipmapLevel} is set to 2, and a
* {@code minMipmapLevelClamp} of 1.2 is specified, then the actual minimum mipmap level clamp will be 3.2.
* - {@code CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel} specifies the least detailed mipmap level. For non-mipmapped resources, this value has to be zero.
* - {@code CUDA_RESOURCE_VIEW_DESC::firstLayer} specifies the first layer index for layered textures. This will be the new layer zero. For non-layered
* resources, this value has to be zero.
* - {@code CUDA_RESOURCE_VIEW_DESC::lastLayer} specifies the last layer index for layered textures. For non-layered resources, this value has to be
* zero.
*
*
* @param pTexObject texture object to create
* @param pResDesc resource descriptor
* @param pTexDesc texture descriptor
* @param pResViewDesc resource view descriptor
*/
@NativeType("CUresult")
public static int cuTexObjectCreate(@NativeType("CUtexObject *") LongBuffer pTexObject, @NativeType("CUDA_RESOURCE_DESC const *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUDA_TEXTURE_DESC const *") CUDA_TEXTURE_DESC pTexDesc, @NativeType("CUDA_RESOURCE_VIEW_DESC const *") CUDA_RESOURCE_VIEW_DESC pResViewDesc) {
if (CHECKS) {
check(pTexObject, 1);
}
return ncuTexObjectCreate(memAddress(pTexObject), pResDesc.address(), pTexDesc.address(), pResViewDesc.address());
}
// --- [ cuTexObjectDestroy ] ---
/**
* Destroys a texture object.
*
* Destroys the texture object specified by {@code texObject}.
*
* @param texObject texture object to destroy
*/
@NativeType("CUresult")
public static int cuTexObjectDestroy(@NativeType("CUtexObject") long texObject) {
long __functionAddress = Functions.TexObjectDestroy;
if (CHECKS) {
check(__functionAddress);
}
return callJI(texObject, __functionAddress);
}
// --- [ cuTexObjectGetResourceDesc ] ---
/** Unsafe version of: {@link #cuTexObjectGetResourceDesc TexObjectGetResourceDesc} */
public static int ncuTexObjectGetResourceDesc(long pResDesc, long texObject) {
long __functionAddress = Functions.TexObjectGetResourceDesc;
if (CHECKS) {
check(__functionAddress);
}
return callPJI(pResDesc, texObject, __functionAddress);
}
/**
* Returns a texture object's resource descriptor.
*
* Returns the resource descriptor for the texture object specified by {@code texObject}.
*
* @param pResDesc resource descriptor
* @param texObject texture object
*/
@NativeType("CUresult")
public static int cuTexObjectGetResourceDesc(@NativeType("CUDA_RESOURCE_DESC *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUtexObject") long texObject) {
return ncuTexObjectGetResourceDesc(pResDesc.address(), texObject);
}
// --- [ cuTexObjectGetTextureDesc ] ---
/** Unsafe version of: {@link #cuTexObjectGetTextureDesc TexObjectGetTextureDesc} */
public static int ncuTexObjectGetTextureDesc(long pTexDesc, long texObject) {
long __functionAddress = Functions.TexObjectGetTextureDesc;
if (CHECKS) {
check(__functionAddress);
}
return callPJI(pTexDesc, texObject, __functionAddress);
}
/**
* Returns a texture object's texture descriptor.
*
* Returns the texture descriptor for the texture object specified by {@code texObject}.
*
* @param pTexDesc texture descriptor
* @param texObject texture object
*/
@NativeType("CUresult")
public static int cuTexObjectGetTextureDesc(@NativeType("CUDA_TEXTURE_DESC *") CUDA_TEXTURE_DESC pTexDesc, @NativeType("CUtexObject") long texObject) {
return ncuTexObjectGetTextureDesc(pTexDesc.address(), texObject);
}
// --- [ cuTexObjectGetResourceViewDesc ] ---
/** Unsafe version of: {@link #cuTexObjectGetResourceViewDesc TexObjectGetResourceViewDesc} */
public static int ncuTexObjectGetResourceViewDesc(long pResViewDesc, long texObject) {
long __functionAddress = Functions.TexObjectGetResourceViewDesc;
if (CHECKS) {
check(__functionAddress);
}
return callPJI(pResViewDesc, texObject, __functionAddress);
}
/**
* Returns a texture object's resource view descriptor.
*
* Returns the resource view descriptor for the texture object specified by {@code texObject}. If no resource view was set for {@code texObject}, the
* {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* @param pResViewDesc resource view descriptor
* @param texObject texture object
*/
@NativeType("CUresult")
public static int cuTexObjectGetResourceViewDesc(@NativeType("CUDA_RESOURCE_VIEW_DESC *") CUDA_RESOURCE_VIEW_DESC pResViewDesc, @NativeType("CUtexObject") long texObject) {
return ncuTexObjectGetResourceViewDesc(pResViewDesc.address(), texObject);
}
// --- [ cuSurfObjectCreate ] ---
/** Unsafe version of: {@link #cuSurfObjectCreate SurfObjectCreate} */
public static int ncuSurfObjectCreate(long pSurfObject, long pResDesc) {
long __functionAddress = Functions.SurfObjectCreate;
if (CHECKS) {
check(__functionAddress);
}
return callPPI(pSurfObject, pResDesc, __functionAddress);
}
/**
* Creates a surface object.
*
* Creates a surface object and returns it in {@code pSurfObject}. {@code pResDesc} describes the data to perform surface load/stores on.
* {@code CUDA_RESOURCE_DESC::resType} must be {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} and {@code CUDA_RESOURCE_DESC::res::array::hArray} must be set to a valid CUDA array
* handle. {@code CUDA_RESOURCE_DESC::flags} must be set to zero.
*
* Surface objects are only supported on devices of compute capability 3.0 or higher. Additionally, a surface object is an opaque value, and, as such,
* should only be accessed through CUDA API calls.
*
* @param pSurfObject surface object to create
* @param pResDesc resource descriptor
*/
@NativeType("CUresult")
public static int cuSurfObjectCreate(@NativeType("CUsurfObject *") LongBuffer pSurfObject, @NativeType("CUDA_RESOURCE_DESC const *") CUDA_RESOURCE_DESC pResDesc) {
if (CHECKS) {
check(pSurfObject, 1);
}
return ncuSurfObjectCreate(memAddress(pSurfObject), pResDesc.address());
}
// --- [ cuSurfObjectDestroy ] ---
/**
* Destroys a surface object.
*
* Destroys the surface object specified by {@code surfObject}.
*
* @param surfObject surface object to destroy
*/
@NativeType("CUresult")
public static int cuSurfObjectDestroy(@NativeType("CUsurfObject") long surfObject) {
long __functionAddress = Functions.SurfObjectDestroy;
if (CHECKS) {
check(__functionAddress);
}
return callJI(surfObject, __functionAddress);
}
// --- [ cuSurfObjectGetResourceDesc ] ---
/** Unsafe version of: {@link #cuSurfObjectGetResourceDesc SurfObjectGetResourceDesc} */
public static int ncuSurfObjectGetResourceDesc(long pResDesc, long surfObject) {
long __functionAddress = Functions.SurfObjectGetResourceDesc;
if (CHECKS) {
check(__functionAddress);
}
return callPJI(pResDesc, surfObject, __functionAddress);
}
/**
* Returns a surface object's resource descriptor.
*
* Returns the resource descriptor for the surface object specified by {@code surfObject}.
*
* @param pResDesc resource descriptor
* @param surfObject surface object
*/
@NativeType("CUresult")
public static int cuSurfObjectGetResourceDesc(@NativeType("CUDA_RESOURCE_DESC *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUsurfObject") long surfObject) {
return ncuSurfObjectGetResourceDesc(pResDesc.address(), surfObject);
}
// --- [ cuDeviceCanAccessPeer ] ---
/** Unsafe version of: {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} */
public static int ncuDeviceCanAccessPeer(long canAccessPeer, int dev, int peerDev) {
long __functionAddress = Functions.DeviceCanAccessPeer;
if (CHECKS) {
check(__functionAddress);
}
return callPI(canAccessPeer, dev, peerDev, __functionAddress);
}
/**
* Queries if a device may directly access a peer device's memory.
*
* Returns in {@code *canAccessPeer} a value of 1 if contexts on {@code dev} are capable of directly accessing memory from contexts on {@code peerDev} and
* 0 otherwise. If direct access of {@code peerDev} from {@code dev} is possible, then access may be enabled on two specific contexts by calling
* {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}.
*
* @param canAccessPeer returned access capability
* @param dev device from which allocations on {@code peerDev} are to be directly accessed
* @param peerDev device on which the allocations to be directly accessed by {@code dev} reside
*/
@NativeType("CUresult")
public static int cuDeviceCanAccessPeer(@NativeType("int *") IntBuffer canAccessPeer, @NativeType("CUdevice") int dev, @NativeType("CUdevice") int peerDev) {
if (CHECKS) {
check(canAccessPeer, 1);
}
return ncuDeviceCanAccessPeer(memAddress(canAccessPeer), dev, peerDev);
}
// --- [ cuCtxEnablePeerAccess ] ---
/**
* Enables direct access to memory allocations in a peer context.
*
* If both the current context and {@code peerContext} are on devices which support unified addressing (as may be queried using
* {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}) and same major compute capability, then on success all allocations from {@code peerContext} will immediately
* be accessible by the current context. See ref for additional details.
*
* Note that access granted by this call is unidirectional and that in order to access memory from the current context in {@code peerContext}, a separate
* symmetric call to {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess} is required.
*
* Note that there are both device-wide and system-wide limitations per system configuration, as noted in the CUDA Programming Guide under the section
* "Peer-to-Peer Memory Access".
*
* Returns {@link #CUDA_ERROR_PEER_ACCESS_UNSUPPORTED} if {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} indicates that the {@code CUdevice} of the current context cannot directly access
* memory from the {@code CUdevice} of {@code peerContext}.
*
* Returns {@link #CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED} if direct access of {@code peerContext} from the current context has already been enabled.
*
* Returns {@link #CUDA_ERROR_TOO_MANY_PEERS} if direct peer access is not possible because hardware resources required for peer access have been exhausted.
*
* Returns {@link #CUDA_ERROR_INVALID_CONTEXT} if there is no current context, {@code peerContext} is not a valid context, or if the current context is {@code
* peerContext}.
*
* Returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code Flags} is not 0.
*
* @param peerContext peer context to enable direct access to from the current context
* @param Flags reserved for future use and must be set to 0
*/
@NativeType("CUresult")
public static int cuCtxEnablePeerAccess(@NativeType("CUcontext") long peerContext, @NativeType("unsigned int") int Flags) {
long __functionAddress = Functions.CtxEnablePeerAccess;
if (CHECKS) {
check(__functionAddress);
check(peerContext);
}
return callPI(peerContext, Flags, __functionAddress);
}
// --- [ cuCtxDisablePeerAccess ] ---
/**
* Disables direct access to memory allocations in a peer context and unregisters any registered allocations.
*
* Returns {@link #CUDA_ERROR_PEER_ACCESS_NOT_ENABLED} if direct peer access has not yet been enabled from {@code peerContext} to the current context.
*
* Returns {@link #CUDA_ERROR_INVALID_CONTEXT} if there is no current context, or if {@code peerContext} is not a valid context.
*
* @param peerContext peer context to disable direct access to
*/
@NativeType("CUresult")
public static int cuCtxDisablePeerAccess(@NativeType("CUcontext") long peerContext) {
long __functionAddress = Functions.CtxDisablePeerAccess;
if (CHECKS) {
check(__functionAddress);
check(peerContext);
}
return callPI(peerContext, __functionAddress);
}
// --- [ cuDeviceGetP2PAttribute ] ---
/** Unsafe version of: {@link #cuDeviceGetP2PAttribute DeviceGetP2PAttribute} */
public static int ncuDeviceGetP2PAttribute(long value, int attrib, int srcDevice, int dstDevice) {
long __functionAddress = Functions.DeviceGetP2PAttribute;
if (CHECKS) {
check(__functionAddress);
}
return callPI(value, attrib, srcDevice, dstDevice, __functionAddress);
}
/**
* Queries attributes of the link between two devices.
*
* Returns in {@code *value} the value of the requested attribute {@code attrib} of the link between {@code srcDevice} and {@code dstDevice}. The
* supported attributes are:
*
*
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK}: A relative value indicating the performance of the link between two devices.
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED} P2P: 1 if P2P Access is enable.
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED}: 1 if Atomic operations over the link are supported.
* - {@link #CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED}: 1 if cudaArray can be accessed over the link.
*
*
* Returns {@link #CUDA_ERROR_INVALID_DEVICE} if {@code srcDevice} or {@code dstDevice} are not valid or if they represent the same device.
*
* Returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code attrib} is not valid or if {@code value} is a null pointer.
*
* @param value returned value of the requested attribute
* @param attrib the requested attribute of the link between {@code srcDevice} and {@code dstDevice}
* @param srcDevice the source device of the target link
* @param dstDevice the destination device of the target link
*/
@NativeType("CUresult")
public static int cuDeviceGetP2PAttribute(@NativeType("int *") IntBuffer value, @NativeType("CUdevice_P2PAttribute") int attrib, @NativeType("CUdevice") int srcDevice, @NativeType("CUdevice") int dstDevice) {
if (CHECKS) {
check(value, 1);
}
return ncuDeviceGetP2PAttribute(memAddress(value), attrib, srcDevice, dstDevice);
}
// --- [ cuGraphicsUnregisterResource ] ---
/**
* Unregisters a graphics resource for access by CUDA.
*
* Unregisters the graphics resource {@code resource} so it is not accessible by CUDA unless registered again.
*
* If {@code resource} is invalid then {@link #CUDA_ERROR_INVALID_HANDLE} is returned.
*
* @param resource resource to unregister
*/
@NativeType("CUresult")
public static int cuGraphicsUnregisterResource(@NativeType("CUgraphicsResource") long resource) {
long __functionAddress = Functions.GraphicsUnregisterResource;
if (CHECKS) {
check(resource);
}
return callPI(resource, __functionAddress);
}
// --- [ cuGraphicsSubResourceGetMappedArray ] ---
/** Unsafe version of: {@link #cuGraphicsSubResourceGetMappedArray GraphicsSubResourceGetMappedArray} */
public static int ncuGraphicsSubResourceGetMappedArray(long pArray, long resource, int arrayIndex, int mipLevel) {
long __functionAddress = Functions.GraphicsSubResourceGetMappedArray;
if (CHECKS) {
check(resource);
}
return callPPI(pArray, resource, arrayIndex, mipLevel, __functionAddress);
}
/**
* Get an array through which to access a subresource of a mapped graphics resource.
*
* Returns in {@code *pArray} an array through which the subresource of the mapped graphics resource {@code resource} which corresponds to array index
* {@code arrayIndex} and mipmap level {@code mipLevel} may be accessed. The value set in {@code *pArray} may change every time that {@code resource} is
* mapped.
*
* If {@code resource} is not a texture then it cannot be accessed via an array and {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY} is returned. If {@code arrayIndex} is
* not a valid array index for {@code resource} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. If {@code mipLevel} is not a valid mipmap level for {@code
* resource} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. If {@code resource} is not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned.
*
* @param pArray returned array through which a subresource of {@code resource} may be accessed
* @param resource mapped resource to access
* @param arrayIndex array index for array textures or cubemap face index as defined by {@code CUarray_cubemap_face} for cubemap textures for the subresource to access
* @param mipLevel mipmap level for the subresource to access
*/
@NativeType("CUresult")
public static int cuGraphicsSubResourceGetMappedArray(@NativeType("CUarray *") PointerBuffer pArray, @NativeType("CUgraphicsResource") long resource, @NativeType("unsigned int") int arrayIndex, @NativeType("unsigned int") int mipLevel) {
if (CHECKS) {
check(pArray, 1);
}
return ncuGraphicsSubResourceGetMappedArray(memAddress(pArray), resource, arrayIndex, mipLevel);
}
// --- [ cuGraphicsResourceGetMappedMipmappedArray ] ---
/** Unsafe version of: {@link #cuGraphicsResourceGetMappedMipmappedArray GraphicsResourceGetMappedMipmappedArray} */
public static int ncuGraphicsResourceGetMappedMipmappedArray(long pMipmappedArray, long resource) {
long __functionAddress = Functions.GraphicsResourceGetMappedMipmappedArray;
if (CHECKS) {
check(__functionAddress);
check(resource);
}
return callPPI(pMipmappedArray, resource, __functionAddress);
}
/**
* Get a mipmapped array through which to access a mapped graphics resource.
*
* Returns in {@code *pMipmappedArray} a mipmapped array through which the mapped graphics resource {@code resource}. The value set in
* {@code *pMipmappedArray} may change every time that {@code resource} is mapped.
*
* If {@code resource} is not a texture then it cannot be accessed via a mipmapped array and {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY} is returned. If {@code
* resource} is not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned.
*
* @param pMipmappedArray returned mipmapped array through which {@code resource} may be accessed
* @param resource mapped resource to access
*/
@NativeType("CUresult")
public static int cuGraphicsResourceGetMappedMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer pMipmappedArray, @NativeType("CUgraphicsResource") long resource) {
if (CHECKS) {
check(pMipmappedArray, 1);
}
return ncuGraphicsResourceGetMappedMipmappedArray(memAddress(pMipmappedArray), resource);
}
// --- [ cuGraphicsResourceGetMappedPointer ] ---
/** Unsafe version of: {@link #cuGraphicsResourceGetMappedPointer GraphicsResourceGetMappedPointer} */
public static int ncuGraphicsResourceGetMappedPointer(long pDevPtr, long pSize, long resource) {
long __functionAddress = Functions.GraphicsResourceGetMappedPointer;
if (CHECKS) {
check(resource);
}
return callPPPI(pDevPtr, pSize, resource, __functionAddress);
}
/**
* Get a device pointer through which to access a mapped graphics resource.
*
* Returns in {@code *pDevPtr} a pointer through which the mapped graphics resource {@code resource} may be accessed. Returns in {@code pSize} the size of
* the memory in bytes which may be accessed from that pointer. The value set in {@code pPointer} may change every time that {@code resource} is mapped.
*
* If {@code resource} is not a buffer then it cannot be accessed via a pointer and {@link #CUDA_ERROR_NOT_MAPPED_AS_POINTER} is returned. If {@code resource} is
* not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned. *
*
* @param pDevPtr returned pointer through which {@code resource} may be accessed
* @param pSize returned size of the buffer accessible starting at {@code *pPointer}
* @param resource mapped resource to access
*/
@NativeType("CUresult")
public static int cuGraphicsResourceGetMappedPointer(@NativeType("CUdeviceptr *") PointerBuffer pDevPtr, @NativeType("size_t *") PointerBuffer pSize, @NativeType("CUgraphicsResource") long resource) {
if (CHECKS) {
check(pDevPtr, 1);
check(pSize, 1);
}
return ncuGraphicsResourceGetMappedPointer(memAddress(pDevPtr), memAddress(pSize), resource);
}
// --- [ cuGraphicsResourceSetMapFlags ] ---
/**
* Set usage flags for mapping a graphics resource.
*
* Set {@code flags} for mapping the graphics resource {@code resource}.
*
* Changes to {@code flags} will take effect the next time {@code resource} is mapped. The {@code flags} argument may be any of the following:
*
*
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE GRAPHICS_MAP_RESOURCE_FLAGS_NONE}: Specifies no hints about how this resource will be used. It is therefore assumed that this resource will be
* read from and written to by CUDA kernels. This is the default value.
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY}: Specifies that CUDA kernels which access this resource will not write to this resource.
* - {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD}: Specifies that CUDA kernels which access this resource will not read from this resource and will
* write over the entire contents of the resource, so none of the data previously stored in the resource will be preserved.
*
*
* If {@code resource} is presently mapped for access by CUDA then {@link #CUDA_ERROR_ALREADY_MAPPED} is returned. If {@code flags} is not one of the above
* values then {@link #CUDA_ERROR_INVALID_VALUE} is returned.
*
* @param resource registered resource to set flags for
* @param flags parameters for resource mapping
*/
@NativeType("CUresult")
public static int cuGraphicsResourceSetMapFlags(@NativeType("CUgraphicsResource") long resource, @NativeType("unsigned int") int flags) {
long __functionAddress = Functions.GraphicsResourceSetMapFlags;
if (CHECKS) {
check(resource);
}
return callPI(resource, flags, __functionAddress);
}
// --- [ cuGraphicsMapResources ] ---
/**
* Unsafe version of: {@link #cuGraphicsMapResources GraphicsMapResources}
*
* @param count number of resources to map
*/
public static int ncuGraphicsMapResources(int count, long resources, long hStream) {
long __functionAddress = Functions.GraphicsMapResources;
return callPPI(count, resources, hStream, __functionAddress);
}
/**
* Map graphics resources for access by CUDA.
*
* Maps the {@code count} graphics resources in {@code resources} for access by CUDA.
*
* The resources in {@code resources} may be accessed by CUDA until they are unmapped. The graphics API from which {@code resources} were registered
* should not access any resources while they are mapped by CUDA. If an application does so, the results are undefined.
*
* This function provides the synchronization guarantee that any graphics calls issued before {@link #cuGraphicsMapResources GraphicsMapResources} will complete before any
* subsequent CUDA work issued in {@code stream} begins.
*
* If {@code resources} includes any duplicate entries then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If any of {@code resources} are presently mapped for
* access by CUDA then {@link #CUDA_ERROR_ALREADY_MAPPED} is returned.
*
* @param resources resources to map for CUDA usage
* @param hStream stream with which to synchronize
*/
@NativeType("CUresult")
public static int cuGraphicsMapResources(@NativeType("CUgraphicsResource *") PointerBuffer resources, @NativeType("CUstream") long hStream) {
return ncuGraphicsMapResources(resources.remaining(), memAddress(resources), hStream);
}
// --- [ cuGraphicsUnmapResources ] ---
/**
* Unsafe version of: {@link #cuGraphicsUnmapResources GraphicsUnmapResources}
*
* @param count number of resources to unmap
*/
public static int ncuGraphicsUnmapResources(int count, long resources, long hStream) {
long __functionAddress = Functions.GraphicsUnmapResources;
return callPPI(count, resources, hStream, __functionAddress);
}
/**
* Unmap graphics resources.
*
* Unmaps the {@code count} graphics resources in {@code resources}.
*
* Once unmapped, the resources in {@code resources} may not be accessed by CUDA until they are mapped again.
*
* This function provides the synchronization guarantee that any CUDA work issued in {@code stream} before {@link #cuGraphicsUnmapResources GraphicsUnmapResources} will complete
* before any subsequently issued graphics work begins.
*
* If {@code resources} includes any duplicate entries then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If any of {@code resources} are not presently mapped
* for access by CUDA then {@link #CUDA_ERROR_NOT_MAPPED} is returned.
*
* @param resources resources to unmap
* @param hStream stream with which to synchronize
*/
@NativeType("CUresult")
public static int cuGraphicsUnmapResources(@NativeType("CUgraphicsResource *") PointerBuffer resources, @NativeType("CUstream") long hStream) {
return ncuGraphicsUnmapResources(resources.remaining(), memAddress(resources), hStream);
}
// --- [ cuGetProcAddress ] ---
/** Unsafe version of: {@link #cuGetProcAddress GetProcAddress} */
public static int ncuGetProcAddress(long symbol, long pfn, int cudaVersion, long flags) {
long __functionAddress = Functions.GetProcAddress;
if (CHECKS) {
check(__functionAddress);
}
return callPPJI(symbol, pfn, cudaVersion, flags, __functionAddress);
}
/**
* Returns the requested driver API function pointer.
*
* Returns in {@code **pfn} the address of the CUDA driver function for the requested CUDA version and flags.
*
* The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2 should be specified as 11020. For a requested driver symbol, if the
* specified CUDA version is greater than or equal to the CUDA version in which the driver symbol was introduced, this API will return the function
* pointer to the corresponding versioned function.
*
* The pointer returned by the API should be cast to a function pointer matching the requested driver function's definition in the API header file. The
* function pointer typedef can be picked up from the corresponding typedefs header file. For example, cudaTypedefs.h consists of function pointer
* typedefs for driver APIs defined in cuda.h.
*
* The API will return {@link #CUDA_ERROR_NOT_FOUND} if the requested driver function is not supported on the platform, no ABI compatible driver function exists
* for the specified {@code cudaVersion} or if the driver symbol is invalid.
*
* The requested flags can be:
*
*
* - {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT}: This is the default mode. This is equivalent to {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} if the code is
* compiled with --default-stream per-thread compilation flag or the macro {@code CUDA_API_PER_THREAD_DEFAULT_STREAM} is defined;
* {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} otherwise.
* - {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name except the
* corresponding per-thread versions.
* - {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name
* including the per-thread versions. If a per-thread version is not found, the API will return the legacy version of the driver function.
*
*
* @param symbol the base name of the driver API function to look for. As an example, for the driver API {@code cuMemAlloc_v2()}, {@code symbol} would be
* {@code cuMemAlloc} and {@code cudaVersion} would be the ABI compatible CUDA version for the {@code _v2} variant.
* @param pfn location to return the function pointer to the requested driver function
* @param cudaVersion the CUDA version to look for the requested driver symbol
* @param flags flags to specify search options
*/
@NativeType("CUresult")
public static int cuGetProcAddress(@NativeType("char const *") ByteBuffer symbol, @NativeType("void **") PointerBuffer pfn, int cudaVersion, @NativeType("cuuint64_t") long flags) {
if (CHECKS) {
checkNT1(symbol);
check(pfn, 1);
}
return ncuGetProcAddress(memAddress(symbol), memAddress(pfn), cudaVersion, flags);
}
/**
* Returns the requested driver API function pointer.
*
* Returns in {@code **pfn} the address of the CUDA driver function for the requested CUDA version and flags.
*
* The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2 should be specified as 11020. For a requested driver symbol, if the
* specified CUDA version is greater than or equal to the CUDA version in which the driver symbol was introduced, this API will return the function
* pointer to the corresponding versioned function.
*
* The pointer returned by the API should be cast to a function pointer matching the requested driver function's definition in the API header file. The
* function pointer typedef can be picked up from the corresponding typedefs header file. For example, cudaTypedefs.h consists of function pointer
* typedefs for driver APIs defined in cuda.h.
*
* The API will return {@link #CUDA_ERROR_NOT_FOUND} if the requested driver function is not supported on the platform, no ABI compatible driver function exists
* for the specified {@code cudaVersion} or if the driver symbol is invalid.
*
* The requested flags can be:
*
*
* - {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT}: This is the default mode. This is equivalent to {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} if the code is
* compiled with --default-stream per-thread compilation flag or the macro {@code CUDA_API_PER_THREAD_DEFAULT_STREAM} is defined;
* {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} otherwise.
* - {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name except the
* corresponding per-thread versions.
* - {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name
* including the per-thread versions. If a per-thread version is not found, the API will return the legacy version of the driver function.
*
*
* @param symbol the base name of the driver API function to look for. As an example, for the driver API {@code cuMemAlloc_v2()}, {@code symbol} would be
* {@code cuMemAlloc} and {@code cudaVersion} would be the ABI compatible CUDA version for the {@code _v2} variant.
* @param pfn location to return the function pointer to the requested driver function
* @param cudaVersion the CUDA version to look for the requested driver symbol
* @param flags flags to specify search options
*/
@NativeType("CUresult")
public static int cuGetProcAddress(@NativeType("char const *") CharSequence symbol, @NativeType("void **") PointerBuffer pfn, int cudaVersion, @NativeType("cuuint64_t") long flags) {
if (CHECKS) {
check(pfn, 1);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
stack.nASCII(symbol, true);
long symbolEncoded = stack.getPointerAddress();
return ncuGetProcAddress(symbolEncoded, memAddress(pfn), cudaVersion, flags);
} finally {
stack.setPointer(stackPointer);
}
}
// --- [ cuGetExportTable ] ---
public static int ncuGetExportTable(long ppExportTable, long pExportTableId) {
long __functionAddress = Functions.GetExportTable;
return callPPI(ppExportTable, pExportTableId, __functionAddress);
}
@NativeType("CUresult")
public static int cuGetExportTable(@NativeType("void const **") PointerBuffer ppExportTable, @NativeType("CUuuid const *") CUuuid pExportTableId) {
return ncuGetExportTable(memAddress(ppExportTable), pExportTableId.address());
}
// --- [ cuIpcOpenEventHandle ] ---
private static final FFICIF IpcOpenEventHandleCIF = apiCreateCIF(
apiStdcall(), ffi_type_uint32,
ffi_type_pointer, apiCreateStruct(apiCreateArray(ffi_type_schar, 64))
);
/** Unsafe version of: {@link #cuIpcOpenEventHandle IpcOpenEventHandle} */
public static int ncuIpcOpenEventHandle(long phEvent, long handle) {
long __functionAddress = Functions.IpcOpenEventHandle$Address;
if (CHECKS) {
check(__functionAddress);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
long __result = stack.nmalloc(4);
long values = stack.nmalloc(8, POINTER_SIZE + 8);
memPutAddress(values, phEvent);
memPutLong(values + POINTER_SIZE, handle);
long arguments = stack.nmalloc(POINTER_SIZE,POINTER_SIZE * 2);
memPutAddress(arguments, values);
memPutAddress(arguments + POINTER_SIZE, values + POINTER_SIZE);
nffi_call(IpcOpenEventHandleCIF.address(), __functionAddress, __result, arguments);
return memGetInt(__result);
} finally {
stack.setPointer(stackPointer);
}
}
/**
* Opens an interprocess event handle for use in the current process.
*
* Opens an interprocess event handle exported from another process with {@link #cuIpcGetEventHandle IpcGetEventHandle}. This function returns a {@code CUevent} that behaves like a
* locally created event with the {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flag specified. This event must be freed with {@link #cuEventDestroy EventDestroy}.
*
* Performing operations on the imported event after the exported event has been freed with {@link #cuEventDestroy EventDestroy} will result in undefined behavior.
*
* IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is
* restricted to GPUs in TCC mode.
*
* @param phEvent returns the imported event
* @param handle interprocess handle to open
*/
@NativeType("CUresult")
public static int cuIpcOpenEventHandle(@NativeType("CUevent *") PointerBuffer phEvent, @NativeType("CUipcEventHandle") CUIPCEventHandle handle) {
if (CHECKS) {
check(phEvent, 1);
}
return ncuIpcOpenEventHandle(memAddress(phEvent), handle.address());
}
// --- [ cuIpcOpenMemHandle ] ---
private static final FFICIF IpcOpenMemHandleCIF = apiCreateCIF(
apiStdcall(), ffi_type_uint32,
ffi_type_pointer, apiCreateStruct(apiCreateArray(ffi_type_schar, 64)), ffi_type_uint32
);
/** Unsafe version of: {@link #cuIpcOpenMemHandle IpcIpcOpenMemHandle} */
public static int ncuIpcOpenMemHandle(long pdptr, long handle, int Flags) {
long __functionAddress = Functions.IpcOpenEventHandle$Address;
if (CHECKS) {
check(__functionAddress);
}
MemoryStack stack = stackGet(); int stackPointer = stack.getPointer();
try {
long __result = stack.nmalloc(4);
long values = stack.nmalloc(8, POINTER_SIZE + 8 + 4);
memPutAddress(values, pdptr);
memPutLong(values + POINTER_SIZE, handle);
memPutInt(values + POINTER_SIZE + 8, Flags);
long arguments = stack.nmalloc(POINTER_SIZE,POINTER_SIZE * 3);
memPutAddress(arguments, values);
memPutAddress(arguments + POINTER_SIZE, values + POINTER_SIZE);
memPutAddress(arguments + POINTER_SIZE * 2, values + POINTER_SIZE + 8);
nffi_call(IpcOpenMemHandleCIF.address(), __functionAddress, __result, arguments);
return memGetInt(__result);
} finally {
stack.setPointer(stackPointer);
}
}
/**
* Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process.
*
* Maps memory exported from another process with {@link #cuIpcGetMemHandle IpcGetMemHandle} into the current device address space. For contexts on different devices
* {@code cuIpcOpenMemHandle} can attempt to enable peer access between the devices as if the user called {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}. This behavior is controlled
* by the {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS} flag. {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} can determine if a mapping is possible.
*
* Contexts that may open {@link CUIPCMemHandle}s are restricted in the following way. {@code CUipcMemHandles} from each {@code CUdevice} in a given process may
* only be opened by one {@code CUcontext} per {@code CUdevice} per other process.
*
* If the memory handle has already been opened by the current context, the reference count on the handle is incremented by 1 and the existing device
* pointer is returned.
*
* Memory returned from {@code cuIpcOpenMemHandle} must be freed with {@link #cuIpcCloseMemHandle IpcCloseMemHandle}.
*
* Calling {@link #cuMemFree MemFree} on an exported memory region before calling {@link #cuIpcCloseMemHandle IpcCloseMemHandle} in the importing context will result in undefined behavior.
*
* IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is
* restricted to GPUs in TCC mode
*
* Note
*
* No guarantees are made about the address returned in {@code *pdptr}. In particular, multiple processes may not receive the same address for
* the same {@code handle}.
*
* @param pdptr returned device pointer
* @param handle {@code CUipcMemHandle} to open
* @param Flags flags for this operation. Must be specified as {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS}
*/
@NativeType("CUresult")
public static int cuIpcOpenMemHandle(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("CUipcMemHandle") CUIPCMemHandle handle, @NativeType("unsigned int") int Flags) {
if (CHECKS) {
check(pdptr, 1);
}
return ncuIpcOpenMemHandle(memAddress(pdptr), handle.address(), Flags);
}
}