All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.lwjgl.cuda.CU Maven / Gradle / Ivy

/*
 * Copyright LWJGL. All rights reserved.
 * License terms: https://www.lwjgl.org/license
 * MACHINE GENERATED FILE, DO NOT EDIT
 */
package org.lwjgl.cuda;

import javax.annotation.*;

import java.nio.*;

import org.lwjgl.*;

import org.lwjgl.system.*;

import static org.lwjgl.system.APIUtil.*;
import static org.lwjgl.system.Checks.*;
import static org.lwjgl.system.JNI.*;
import static org.lwjgl.system.MemoryStack.*;
import static org.lwjgl.system.MemoryUtil.*;
import static org.lwjgl.system.Pointer.*;

import org.lwjgl.system.libffi.*;

import static org.lwjgl.cuda.CUDA.*;
import static org.lwjgl.system.libffi.LibFFI.*;

/**
 * Contains bindings to the CUDA Driver API.
 * 
 * 

Functionality up to CUDA version 3.2, which is the minimum version compatible with the LWJGL bindings, is guaranteed to be available. Functions * introduced after CUDA 3.2 may or may not be missing, depending on the CUDA version available at runtime.

*/ public class CU { private static final SharedLibrary NVCUDA = Library.loadNative(CU.class, "org.lwjgl.cuda", Configuration.CUDA_LIBRARY_NAME, "nvcuda"); /** Contains the function pointers loaded from the NVCUDA {@link SharedLibrary}. */ public static final class Functions { private Functions() {} /** Function address. */ public static final long GetErrorString = apiGetFunctionAddress(NVCUDA, "cuGetErrorString"), GetErrorName = apiGetFunctionAddress(NVCUDA, "cuGetErrorName"), Init = apiGetFunctionAddress(NVCUDA, "cuInit"), DriverGetVersion = apiGetFunctionAddress(NVCUDA, "cuDriverGetVersion"), DeviceGet = apiGetFunctionAddress(NVCUDA, "cuDeviceGet"), DeviceGetCount = apiGetFunctionAddress(NVCUDA, "cuDeviceGetCount"), DeviceGetName = apiGetFunctionAddress(NVCUDA, "cuDeviceGetName"), DeviceGetUuid = NVCUDA.getFunctionAddress("cuDeviceGetUuid"), DeviceGetUuid_v2 = NVCUDA.getFunctionAddress("cuDeviceGetUuid_v2"), DeviceGetLuid = NVCUDA.getFunctionAddress("cuDeviceGetLuid"), DeviceTotalMem = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuDeviceTotalMem", 2)), DeviceGetTexture1DLinearMaxWidth = NVCUDA.getFunctionAddress("cuDeviceGetTexture1DLinearMaxWidth"), DeviceGetAttribute = apiGetFunctionAddress(NVCUDA, "cuDeviceGetAttribute"), DeviceGetNvSciSyncAttributes = NVCUDA.getFunctionAddress("cuDeviceGetNvSciSyncAttributes"), DeviceSetMemPool = NVCUDA.getFunctionAddress("cuDeviceSetMemPool"), DeviceGetMemPool = NVCUDA.getFunctionAddress("cuDeviceGetMemPool"), DeviceGetDefaultMemPool = NVCUDA.getFunctionAddress("cuDeviceGetDefaultMemPool"), FlushGPUDirectRDMAWrites = NVCUDA.getFunctionAddress("cuFlushGPUDirectRDMAWrites"), DeviceGetProperties = apiGetFunctionAddress(NVCUDA, "cuDeviceGetProperties"), DeviceComputeCapability = apiGetFunctionAddress(NVCUDA, "cuDeviceComputeCapability"), DevicePrimaryCtxRetain = NVCUDA.getFunctionAddress("cuDevicePrimaryCtxRetain"), DevicePrimaryCtxRelease = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxRelease", 2)), DevicePrimaryCtxSetFlags = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxSetFlags", 2)), DevicePrimaryCtxGetState = NVCUDA.getFunctionAddress("cuDevicePrimaryCtxGetState"), DevicePrimaryCtxReset = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuDevicePrimaryCtxReset", 2)), DeviceGetExecAffinitySupport = NVCUDA.getFunctionAddress("cuDeviceGetExecAffinitySupport"), CtxCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuCtxCreate", 2)), CtxCreate_v3 = NVCUDA.getFunctionAddress("cuCtxCreate_v3"), CtxDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxDestroy", 2)), CtxPushCurrent = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxPushCurrent", 2)), CtxPopCurrent = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuCtxPopCurrent", 2)), CtxSetCurrent = NVCUDA.getFunctionAddress("cuCtxSetCurrent"), CtxGetCurrent = NVCUDA.getFunctionAddress("cuCtxGetCurrent"), CtxGetDevice = apiGetFunctionAddress(NVCUDA, "cuCtxGetDevice"), CtxGetFlags = NVCUDA.getFunctionAddress("cuCtxGetFlags"), CtxSynchronize = apiGetFunctionAddress(NVCUDA, "cuCtxSynchronize"), CtxSetLimit = apiGetFunctionAddress(NVCUDA, "cuCtxSetLimit"), CtxGetLimit = apiGetFunctionAddress(NVCUDA, "cuCtxGetLimit"), CtxGetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuCtxGetCacheConfig"), CtxSetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuCtxSetCacheConfig"), CtxGetSharedMemConfig = NVCUDA.getFunctionAddress("cuCtxGetSharedMemConfig"), CtxSetSharedMemConfig = NVCUDA.getFunctionAddress("cuCtxSetSharedMemConfig"), CtxGetApiVersion = apiGetFunctionAddress(NVCUDA, "cuCtxGetApiVersion"), CtxGetStreamPriorityRange = apiGetFunctionAddress(NVCUDA, "cuCtxGetStreamPriorityRange"), CtxResetPersistingL2Cache = NVCUDA.getFunctionAddress("cuCtxResetPersistingL2Cache"), CtxGetExecAffinity = NVCUDA.getFunctionAddress("cuCtxGetExecAffinity"), CtxAttach = apiGetFunctionAddress(NVCUDA, "cuCtxAttach"), CtxDetach = apiGetFunctionAddress(NVCUDA, "cuCtxDetach"), ModuleLoad = apiGetFunctionAddress(NVCUDA, "cuModuleLoad"), ModuleLoadData = apiGetFunctionAddress(NVCUDA, "cuModuleLoadData"), ModuleLoadDataEx = apiGetFunctionAddress(NVCUDA, "cuModuleLoadDataEx"), ModuleLoadFatBinary = apiGetFunctionAddress(NVCUDA, "cuModuleLoadFatBinary"), ModuleUnload = apiGetFunctionAddress(NVCUDA, "cuModuleUnload"), ModuleGetFunction = apiGetFunctionAddress(NVCUDA, "cuModuleGetFunction"), ModuleGetGlobal = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuModuleGetGlobal", 2)), ModuleGetTexRef = apiGetFunctionAddress(NVCUDA, "cuModuleGetTexRef"), ModuleGetSurfRef = apiGetFunctionAddress(NVCUDA, "cuModuleGetSurfRef"), LinkCreate = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkCreate", 2)), LinkAddData = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkAddData", 2)), LinkAddFile = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuLinkAddFile", 2)), LinkComplete = NVCUDA.getFunctionAddress("cuLinkComplete"), LinkDestroy = NVCUDA.getFunctionAddress("cuLinkDestroy"), MemGetInfo = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemGetInfo", 2)), MemAlloc = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAlloc", 2)), MemAllocPitch = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAllocPitch", 2)), MemFree = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemFree", 2)), MemGetAddressRange = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemGetAddressRange", 2)), MemAllocHost = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemAllocHost", 2)), MemFreeHost = apiGetFunctionAddress(NVCUDA, "cuMemFreeHost"), MemHostAlloc = apiGetFunctionAddress(NVCUDA, "cuMemHostAlloc"), MemHostGetDevicePointer = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuMemHostGetDevicePointer", 2)), MemHostGetFlags = apiGetFunctionAddress(NVCUDA, "cuMemHostGetFlags"), MemAllocManaged = NVCUDA.getFunctionAddress("cuMemAllocManaged"), DeviceGetByPCIBusId = NVCUDA.getFunctionAddress("cuDeviceGetByPCIBusId"), DeviceGetPCIBusId = NVCUDA.getFunctionAddress("cuDeviceGetPCIBusId"), IpcGetEventHandle = NVCUDA.getFunctionAddress("cuIpcGetEventHandle"), IpcOpenEventHandle$Address = NVCUDA.getFunctionAddress("cuIpcOpenEventHandle"), IpcGetMemHandle = NVCUDA.getFunctionAddress("cuIpcGetMemHandle"), IpcOpenMemHandle$Address = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuIpcOpenMemHandle", 2)), IpcCloseMemHandle = NVCUDA.getFunctionAddress("cuIpcCloseMemHandle"), MemHostRegister = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuMemHostRegister", 2)), MemHostUnregister = NVCUDA.getFunctionAddress("cuMemHostUnregister"), Memcpy = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpy")), MemcpyPeer = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpyPeer")), MemcpyHtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyHtoD", 2))), MemcpyDtoH = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoH", 2))), MemcpyDtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoD", 2))), MemcpyDtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyDtoA", 2))), MemcpyAtoD = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoD", 2))), MemcpyHtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyHtoA", 2))), MemcpyAtoH = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoH", 2))), MemcpyAtoA = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpyAtoA", 2))), Memcpy2D = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy2D", 2))), Memcpy2DUnaligned = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy2DUnaligned", 2))), Memcpy3D = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemcpy3D", 2))), Memcpy3DPeer = NVCUDA.getFunctionAddress(__CUDA_API_PTDS("cuMemcpy3DPeer")), MemcpyAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpyAsync")), MemcpyPeerAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpyPeerAsync")), MemcpyHtoDAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyHtoDAsync", 2))), MemcpyDtoHAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyDtoHAsync", 2))), MemcpyDtoDAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyDtoDAsync", 2))), MemcpyHtoAAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyHtoAAsync", 2))), MemcpyAtoHAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpyAtoHAsync", 2))), Memcpy2DAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpy2DAsync", 2))), Memcpy3DAsync = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ(__CUDA_API_VERSION("cuMemcpy3DAsync", 2))), Memcpy3DPeerAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemcpy3DPeerAsync")), MemsetD8 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD8", 2))), MemsetD16 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD16", 2))), MemsetD32 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD32", 2))), MemsetD2D8 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D8", 2))), MemsetD2D16 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D16", 2))), MemsetD2D32 = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTDS(__CUDA_API_VERSION("cuMemsetD2D32", 2))), MemsetD8Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD8Async")), MemsetD16Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD16Async")), MemsetD32Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD32Async")), MemsetD2D8Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D8Async")), MemsetD2D16Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D16Async")), MemsetD2D32Async = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuMemsetD2D32Async")), ArrayCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArrayCreate", 2)), ArrayGetDescriptor = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArrayGetDescriptor", 2)), ArrayGetSparseProperties = NVCUDA.getFunctionAddress("cuArrayGetSparseProperties"), MipmappedArrayGetSparseProperties = NVCUDA.getFunctionAddress("cuMipmappedArrayGetSparseProperties"), ArrayGetPlane = NVCUDA.getFunctionAddress("cuArrayGetPlane"), ArrayDestroy = apiGetFunctionAddress(NVCUDA, "cuArrayDestroy"), Array3DCreate = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArray3DCreate", 2)), Array3DGetDescriptor = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuArray3DGetDescriptor", 2)), MipmappedArrayCreate = NVCUDA.getFunctionAddress("cuMipmappedArrayCreate"), MipmappedArrayGetLevel = NVCUDA.getFunctionAddress("cuMipmappedArrayGetLevel"), MipmappedArrayDestroy = NVCUDA.getFunctionAddress("cuMipmappedArrayDestroy"), MemAddressReserve = NVCUDA.getFunctionAddress("cuMemAddressReserve"), MemAddressFree = NVCUDA.getFunctionAddress("cuMemAddressFree"), MemCreate = NVCUDA.getFunctionAddress("cuMemCreate"), MemRelease = NVCUDA.getFunctionAddress("cuMemRelease"), MemMap = NVCUDA.getFunctionAddress("cuMemMap"), MemMapArrayAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemMapArrayAsync")), MemUnmap = NVCUDA.getFunctionAddress("cuMemUnmap"), MemSetAccess = NVCUDA.getFunctionAddress("cuMemSetAccess"), MemGetAccess = NVCUDA.getFunctionAddress("cuMemGetAccess"), MemExportToShareableHandle = NVCUDA.getFunctionAddress("cuMemExportToShareableHandle"), MemImportFromShareableHandle = NVCUDA.getFunctionAddress("cuMemImportFromShareableHandle"), MemGetAllocationGranularity = NVCUDA.getFunctionAddress("cuMemGetAllocationGranularity"), MemGetAllocationPropertiesFromHandle = NVCUDA.getFunctionAddress("cuMemGetAllocationPropertiesFromHandle"), MemRetainAllocationHandle = NVCUDA.getFunctionAddress("cuMemRetainAllocationHandle"), MemFreeAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemFreeAsync")), MemAllocAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemAllocAsync")), MemPoolTrimTo = NVCUDA.getFunctionAddress("cuMemPoolTrimTo"), MemPoolSetAttribute = NVCUDA.getFunctionAddress("cuMemPoolSetAttribute"), MemPoolGetAttribute = NVCUDA.getFunctionAddress("cuMemPoolGetAttribute"), MemPoolSetAccess = NVCUDA.getFunctionAddress("cuMemPoolSetAccess"), MemPoolGetAccess = NVCUDA.getFunctionAddress("cuMemPoolGetAccess"), MemPoolCreate = NVCUDA.getFunctionAddress("cuMemPoolCreate"), MemPoolDestroy = NVCUDA.getFunctionAddress("cuMemPoolDestroy"), MemAllocFromPoolAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemAllocFromPoolAsync")), MemPoolExportToShareableHandle = NVCUDA.getFunctionAddress("cuMemPoolExportToShareableHandle"), MemPoolImportFromShareableHandle = NVCUDA.getFunctionAddress("cuMemPoolImportFromShareableHandle"), MemPoolExportPointer = NVCUDA.getFunctionAddress("cuMemPoolExportPointer"), MemPoolImportPointer = NVCUDA.getFunctionAddress("cuMemPoolImportPointer"), PointerGetAttribute = NVCUDA.getFunctionAddress("cuPointerGetAttribute"), MemPrefetchAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuMemPrefetchAsync")), MemAdvise = NVCUDA.getFunctionAddress("cuMemAdvise"), MemRangeGetAttribute = NVCUDA.getFunctionAddress("cuMemRangeGetAttribute"), MemRangeGetAttributes = NVCUDA.getFunctionAddress("cuMemRangeGetAttributes"), PointerSetAttribute = NVCUDA.getFunctionAddress("cuPointerSetAttribute"), PointerGetAttributes = NVCUDA.getFunctionAddress("cuPointerGetAttributes"), StreamCreate = apiGetFunctionAddress(NVCUDA, "cuStreamCreate"), StreamCreateWithPriority = apiGetFunctionAddress(NVCUDA, "cuStreamCreateWithPriority"), StreamGetPriority = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamGetPriority")), StreamGetFlags = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamGetFlags")), StreamGetCtx = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCtx")), StreamWaitEvent = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamWaitEvent")), StreamAddCallback = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamAddCallback")), StreamBeginCapture = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBeginCapture")), StreamBeginCapture_v2 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBeginCapture_v2")), ThreadExchangeStreamCaptureMode = NVCUDA.getFunctionAddress("cuThreadExchangeStreamCaptureMode"), StreamEndCapture = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamEndCapture")), StreamIsCapturing = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamIsCapturing")), StreamGetCaptureInfo = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCaptureInfo")), StreamGetCaptureInfo_v2 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetCaptureInfo_v2")), StreamUpdateCaptureDependencies = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamUpdateCaptureDependencies")), StreamAttachMemAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamAttachMemAsync")), StreamQuery = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamQuery")), StreamSynchronize = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuStreamSynchronize")), StreamDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuStreamDestroy", 2)), StreamCopyAttributes = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamCopyAttributes")), StreamGetAttribute = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamGetAttribute")), StreamSetAttribute = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamSetAttribute")), EventCreate = apiGetFunctionAddress(NVCUDA, "cuEventCreate"), EventRecord = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuEventRecord")), EventRecordWithFlags = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuEventRecordWithFlags")), EventQuery = apiGetFunctionAddress(NVCUDA, "cuEventQuery"), EventSynchronize = apiGetFunctionAddress(NVCUDA, "cuEventSynchronize"), EventDestroy = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuEventDestroy", 2)), EventElapsedTime = apiGetFunctionAddress(NVCUDA, "cuEventElapsedTime"), ImportExternalMemory = NVCUDA.getFunctionAddress("cuImportExternalMemory"), ExternalMemoryGetMappedBuffer = NVCUDA.getFunctionAddress("cuExternalMemoryGetMappedBuffer"), ExternalMemoryGetMappedMipmappedArray = NVCUDA.getFunctionAddress("cuExternalMemoryGetMappedMipmappedArray"), DestroyExternalMemory = NVCUDA.getFunctionAddress("cuDestroyExternalMemory"), ImportExternalSemaphore = NVCUDA.getFunctionAddress("cuImportExternalSemaphore"), SignalExternalSemaphoresAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuSignalExternalSemaphoresAsync")), WaitExternalSemaphoresAsync = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuWaitExternalSemaphoresAsync")), DestroyExternalSemaphore = NVCUDA.getFunctionAddress("cuDestroyExternalSemaphore"), StreamWaitValue32 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWaitValue32")), StreamWaitValue64 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWaitValue64")), StreamWriteValue32 = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamWriteValue32")), StreamWriteValue64 = NVCUDA.getFunctionAddress("cuStreamWriteValue64"), StreamBatchMemOp = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuStreamBatchMemOp")), FuncGetAttribute = apiGetFunctionAddress(NVCUDA, "cuFuncGetAttribute"), FuncSetAttribute = NVCUDA.getFunctionAddress("cuFuncSetAttribute"), FuncSetCacheConfig = apiGetFunctionAddress(NVCUDA, "cuFuncSetCacheConfig"), FuncSetSharedMemConfig = NVCUDA.getFunctionAddress("cuFuncSetSharedMemConfig"), FuncGetModule = NVCUDA.getFunctionAddress("cuFuncGetModule"), LaunchKernel = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchKernel")), LaunchCooperativeKernel = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchCooperativeKernel")), LaunchCooperativeKernelMultiDevice = NVCUDA.getFunctionAddress("cuLaunchCooperativeKernelMultiDevice"), LaunchHostFunc = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuLaunchHostFunc")), FuncSetBlockShape = apiGetFunctionAddress(NVCUDA, "cuFuncSetBlockShape"), FuncSetSharedSize = apiGetFunctionAddress(NVCUDA, "cuFuncSetSharedSize"), ParamSetSize = apiGetFunctionAddress(NVCUDA, "cuParamSetSize"), ParamSeti = apiGetFunctionAddress(NVCUDA, "cuParamSeti"), ParamSetf = apiGetFunctionAddress(NVCUDA, "cuParamSetf"), ParamSetv = apiGetFunctionAddress(NVCUDA, "cuParamSetv"), Launch = apiGetFunctionAddress(NVCUDA, "cuLaunch"), LaunchGrid = apiGetFunctionAddress(NVCUDA, "cuLaunchGrid"), LaunchGridAsync = apiGetFunctionAddress(NVCUDA, "cuLaunchGridAsync"), ParamSetTexRef = apiGetFunctionAddress(NVCUDA, "cuParamSetTexRef"), GraphCreate = NVCUDA.getFunctionAddress("cuGraphCreate"), GraphAddKernelNode = NVCUDA.getFunctionAddress("cuGraphAddKernelNode"), GraphKernelNodeGetParams = NVCUDA.getFunctionAddress("cuGraphKernelNodeGetParams"), GraphKernelNodeSetParams = NVCUDA.getFunctionAddress("cuGraphKernelNodeSetParams"), GraphAddMemcpyNode = NVCUDA.getFunctionAddress("cuGraphAddMemcpyNode"), GraphMemcpyNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemcpyNodeGetParams"), GraphMemcpyNodeSetParams = NVCUDA.getFunctionAddress("cuGraphMemcpyNodeSetParams"), GraphAddMemsetNode = NVCUDA.getFunctionAddress("cuGraphAddMemsetNode"), GraphMemsetNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemsetNodeGetParams"), GraphMemsetNodeSetParams = NVCUDA.getFunctionAddress("cuGraphMemsetNodeSetParams"), GraphAddHostNode = NVCUDA.getFunctionAddress("cuGraphAddHostNode"), GraphHostNodeGetParams = NVCUDA.getFunctionAddress("cuGraphHostNodeGetParams"), GraphHostNodeSetParams = NVCUDA.getFunctionAddress("cuGraphHostNodeSetParams"), GraphAddChildGraphNode = NVCUDA.getFunctionAddress("cuGraphAddChildGraphNode"), GraphChildGraphNodeGetGraph = NVCUDA.getFunctionAddress("cuGraphChildGraphNodeGetGraph"), GraphAddEmptyNode = NVCUDA.getFunctionAddress("cuGraphAddEmptyNode"), GraphAddEventRecordNode = NVCUDA.getFunctionAddress("cuGraphAddEventRecordNode"), GraphEventRecordNodeGetEvent = NVCUDA.getFunctionAddress("cuGraphEventRecordNodeGetEvent"), GraphEventRecordNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphEventRecordNodeSetEvent"), GraphAddEventWaitNode = NVCUDA.getFunctionAddress("cuGraphAddEventWaitNode"), GraphEventWaitNodeGetEvent = NVCUDA.getFunctionAddress("cuGraphEventWaitNodeGetEvent"), GraphEventWaitNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphEventWaitNodeSetEvent"), GraphAddExternalSemaphoresSignalNode = NVCUDA.getFunctionAddress("cuGraphAddExternalSemaphoresSignalNode"), GraphExternalSemaphoresSignalNodeGetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresSignalNodeGetParams"), GraphExternalSemaphoresSignalNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresSignalNodeSetParams"), GraphAddExternalSemaphoresWaitNode = NVCUDA.getFunctionAddress("cuGraphAddExternalSemaphoresWaitNode"), GraphExternalSemaphoresWaitNodeGetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresWaitNodeGetParams"), GraphExternalSemaphoresWaitNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExternalSemaphoresWaitNodeSetParams"), GraphAddMemAllocNode = NVCUDA.getFunctionAddress("cuGraphAddMemAllocNode"), GraphMemAllocNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemAllocNodeGetParams"), GraphAddMemFreeNode = NVCUDA.getFunctionAddress("cuGraphAddMemFreeNode"), GraphMemFreeNodeGetParams = NVCUDA.getFunctionAddress("cuGraphMemFreeNodeGetParams"), DeviceGraphMemTrim = NVCUDA.getFunctionAddress("cuDeviceGraphMemTrim"), DeviceGetGraphMemAttribute = NVCUDA.getFunctionAddress("cuDeviceGetGraphMemAttribute"), DeviceSetGraphMemAttribute = NVCUDA.getFunctionAddress("cuDeviceSetGraphMemAttribute"), GraphClone = NVCUDA.getFunctionAddress("cuGraphClone"), GraphNodeFindInClone = NVCUDA.getFunctionAddress("cuGraphNodeFindInClone"), GraphNodeGetType = NVCUDA.getFunctionAddress("cuGraphNodeGetType"), GraphGetNodes = NVCUDA.getFunctionAddress("cuGraphGetNodes"), GraphGetRootNodes = NVCUDA.getFunctionAddress("cuGraphGetRootNodes"), GraphGetEdges = NVCUDA.getFunctionAddress("cuGraphGetEdges"), GraphNodeGetDependencies = NVCUDA.getFunctionAddress("cuGraphNodeGetDependencies"), GraphNodeGetDependentNodes = NVCUDA.getFunctionAddress("cuGraphNodeGetDependentNodes"), GraphAddDependencies = NVCUDA.getFunctionAddress("cuGraphAddDependencies"), GraphRemoveDependencies = NVCUDA.getFunctionAddress("cuGraphRemoveDependencies"), GraphDestroyNode = NVCUDA.getFunctionAddress("cuGraphDestroyNode"), GraphInstantiate = NVCUDA.getFunctionAddress(__CUDA_API_VERSION("cuGraphInstantiate", 2)), GraphInstantiateWithFlags = NVCUDA.getFunctionAddress("cuGraphInstantiateWithFlags"), GraphExecKernelNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecKernelNodeSetParams"), GraphExecMemcpyNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecMemcpyNodeSetParams"), GraphExecMemsetNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecMemsetNodeSetParams"), GraphExecHostNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecHostNodeSetParams"), GraphExecChildGraphNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecChildGraphNodeSetParams"), GraphExecEventRecordNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphExecEventRecordNodeSetEvent"), GraphExecEventWaitNodeSetEvent = NVCUDA.getFunctionAddress("cuGraphExecEventWaitNodeSetEvent"), GraphExecExternalSemaphoresSignalNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecExternalSemaphoresSignalNodeSetParams"), GraphExecExternalSemaphoresWaitNodeSetParams = NVCUDA.getFunctionAddress("cuGraphExecExternalSemaphoresWaitNodeSetParams"), GraphUpload = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuGraphUpload")), GraphLaunch = NVCUDA.getFunctionAddress(__CUDA_API_PTSZ("cuGraphLaunch")), GraphExecDestroy = NVCUDA.getFunctionAddress("cuGraphExecDestroy"), GraphDestroy = NVCUDA.getFunctionAddress("cuGraphDestroy"), GraphExecUpdate = NVCUDA.getFunctionAddress("cuGraphExecUpdate"), GraphKernelNodeCopyAttributes = NVCUDA.getFunctionAddress("cuGraphKernelNodeCopyAttributes"), GraphKernelNodeGetAttribute = NVCUDA.getFunctionAddress("cuGraphKernelNodeGetAttribute"), GraphKernelNodeSetAttribute = NVCUDA.getFunctionAddress("cuGraphKernelNodeSetAttribute"), GraphDebugDotPrint = NVCUDA.getFunctionAddress("cuGraphDebugDotPrint"), UserObjectCreate = NVCUDA.getFunctionAddress("cuUserObjectCreate"), UserObjectRetain = NVCUDA.getFunctionAddress("cuUserObjectRetain"), UserObjectRelease = NVCUDA.getFunctionAddress("cuUserObjectRelease"), GraphRetainUserObject = NVCUDA.getFunctionAddress("cuGraphRetainUserObject"), GraphReleaseUserObject = NVCUDA.getFunctionAddress("cuGraphReleaseUserObject"), OccupancyMaxActiveBlocksPerMultiprocessor = NVCUDA.getFunctionAddress("cuOccupancyMaxActiveBlocksPerMultiprocessor"), OccupancyMaxActiveBlocksPerMultiprocessorWithFlags = NVCUDA.getFunctionAddress("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"), OccupancyMaxPotentialBlockSize = NVCUDA.getFunctionAddress("cuOccupancyMaxPotentialBlockSize"), OccupancyMaxPotentialBlockSizeWithFlags = NVCUDA.getFunctionAddress("cuOccupancyMaxPotentialBlockSizeWithFlags"), OccupancyAvailableDynamicSMemPerBlock = NVCUDA.getFunctionAddress("cuOccupancyAvailableDynamicSMemPerBlock"), TexRefSetArray = apiGetFunctionAddress(NVCUDA, "cuTexRefSetArray"), TexRefSetMipmappedArray = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmappedArray"), TexRefSetAddress = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefSetAddress", 2)), TexRefSetAddress2D = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefSetAddress2D", 3)), TexRefSetFormat = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFormat"), TexRefSetAddressMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetAddressMode"), TexRefSetFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFilterMode"), TexRefSetMipmapFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapFilterMode"), TexRefSetMipmapLevelBias = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapLevelBias"), TexRefSetMipmapLevelClamp = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMipmapLevelClamp"), TexRefSetMaxAnisotropy = apiGetFunctionAddress(NVCUDA, "cuTexRefSetMaxAnisotropy"), TexRefSetBorderColor = apiGetFunctionAddress(NVCUDA, "cuTexRefSetBorderColor"), TexRefSetFlags = apiGetFunctionAddress(NVCUDA, "cuTexRefSetFlags"), TexRefGetAddress = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuTexRefGetAddress", 2)), TexRefGetArray = apiGetFunctionAddress(NVCUDA, "cuTexRefGetArray"), TexRefGetMipmappedArray = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmappedArray"), TexRefGetAddressMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetAddressMode"), TexRefGetFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFilterMode"), TexRefGetFormat = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFormat"), TexRefGetMipmapFilterMode = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapFilterMode"), TexRefGetMipmapLevelBias = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapLevelBias"), TexRefGetMipmapLevelClamp = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMipmapLevelClamp"), TexRefGetMaxAnisotropy = apiGetFunctionAddress(NVCUDA, "cuTexRefGetMaxAnisotropy"), TexRefGetBorderColor = apiGetFunctionAddress(NVCUDA, "cuTexRefGetBorderColor"), TexRefGetFlags = apiGetFunctionAddress(NVCUDA, "cuTexRefGetFlags"), TexRefCreate = apiGetFunctionAddress(NVCUDA, "cuTexRefCreate"), TexRefDestroy = apiGetFunctionAddress(NVCUDA, "cuTexRefDestroy"), SurfRefSetArray = apiGetFunctionAddress(NVCUDA, "cuSurfRefSetArray"), SurfRefGetArray = apiGetFunctionAddress(NVCUDA, "cuSurfRefGetArray"), TexObjectCreate = NVCUDA.getFunctionAddress("cuTexObjectCreate"), TexObjectDestroy = NVCUDA.getFunctionAddress("cuTexObjectDestroy"), TexObjectGetResourceDesc = NVCUDA.getFunctionAddress("cuTexObjectGetResourceDesc"), TexObjectGetTextureDesc = NVCUDA.getFunctionAddress("cuTexObjectGetTextureDesc"), TexObjectGetResourceViewDesc = NVCUDA.getFunctionAddress("cuTexObjectGetResourceViewDesc"), SurfObjectCreate = NVCUDA.getFunctionAddress("cuSurfObjectCreate"), SurfObjectDestroy = NVCUDA.getFunctionAddress("cuSurfObjectDestroy"), SurfObjectGetResourceDesc = NVCUDA.getFunctionAddress("cuSurfObjectGetResourceDesc"), DeviceCanAccessPeer = NVCUDA.getFunctionAddress("cuDeviceCanAccessPeer"), CtxEnablePeerAccess = NVCUDA.getFunctionAddress("cuCtxEnablePeerAccess"), CtxDisablePeerAccess = NVCUDA.getFunctionAddress("cuCtxDisablePeerAccess"), DeviceGetP2PAttribute = NVCUDA.getFunctionAddress("cuDeviceGetP2PAttribute"), GraphicsUnregisterResource = apiGetFunctionAddress(NVCUDA, "cuGraphicsUnregisterResource"), GraphicsSubResourceGetMappedArray = apiGetFunctionAddress(NVCUDA, "cuGraphicsSubResourceGetMappedArray"), GraphicsResourceGetMappedMipmappedArray = NVCUDA.getFunctionAddress("cuGraphicsResourceGetMappedMipmappedArray"), GraphicsResourceGetMappedPointer = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuGraphicsResourceGetMappedPointer", 2)), GraphicsResourceSetMapFlags = apiGetFunctionAddress(NVCUDA, __CUDA_API_VERSION("cuGraphicsResourceSetMapFlags", 2)), GraphicsMapResources = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuGraphicsMapResources")), GraphicsUnmapResources = apiGetFunctionAddress(NVCUDA, __CUDA_API_PTSZ("cuGraphicsUnmapResources")), GetProcAddress = NVCUDA.getFunctionAddress("cuGetProcAddress"), GetExportTable = apiGetFunctionAddress(NVCUDA, "cuGetExportTable"); } /** Returns the NVCUDA {@link SharedLibrary}. */ public static SharedLibrary getLibrary() { return NVCUDA; } public static final int CU_IPC_HANDLE_SIZE = 64; /** * CUDA Ipc Mem Flags. ({@code CUipcMem_flags}) * *
Enum values:
* *
    *
  • {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS} - Automatically enable peer access between remote devices as needed
  • *
*/ public static final int CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1; /** * CUDA Mem Attach Flags. ({@code CUmemAttach_flags}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} - Memory can be accessed by any stream on any device
  • *
  • {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} - Memory cannot be accessed by any stream on any device
  • *
  • {@link #CU_MEM_ATTACH_SINGLE MEM_ATTACH_SINGLE} - Memory can only be accessed by a single stream on the associated device
  • *
*/ public static final int CU_MEM_ATTACH_GLOBAL = 0x1, CU_MEM_ATTACH_HOST = 0x2, CU_MEM_ATTACH_SINGLE = 0x4; /** * Context creation flags. ({@code CUctx_flags}) * *
Enum values:
* *
    *
  • {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} - Automatic scheduling
  • *
  • {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN} - Set spin as default scheduling
  • *
  • {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD} - Set yield as default scheduling
  • *
  • {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} - Set blocking synchronization as default scheduling
  • *
  • {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC} - Set blocking synchronization as default scheduling. This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.
  • *
  • {@link #CU_CTX_SCHED_MASK CTX_SCHED_MASK}
  • *
  • {@link #CU_CTX_MAP_HOST CTX_MAP_HOST} - * This flag was deprecated as of CUDA 11.0 and it no longer has any effect. * *

    All contexts as of CUDA 3.2 behave as though the flag is enabled.

    *
  • *
  • {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX} - Keep local memory allocation after launch
  • *
  • {@link #CU_CTX_FLAGS_MASK CTX_FLAGS_MASK}
  • *
*/ public static final int CU_CTX_SCHED_AUTO = 0x0, CU_CTX_SCHED_SPIN = 0x1, CU_CTX_SCHED_YIELD = 0x2, CU_CTX_SCHED_BLOCKING_SYNC = 0x4, CU_CTX_BLOCKING_SYNC = 0x4, CU_CTX_SCHED_MASK = 0x7, CU_CTX_MAP_HOST = 0x8, CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, CU_CTX_FLAGS_MASK = 0x1F; /** * Stream creation flags. ({@code CUstream_flags}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_DEFAULT STREAM_DEFAULT} - Default stream flag
  • *
  • {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} - Stream does not synchronize with stream 0 (the {@code NULL} stream)
  • *
*/ public static final int CU_STREAM_DEFAULT = 0x0, CU_STREAM_NON_BLOCKING = 0x1; /** * Legacy stream handle. * *

Stream handle that can be passed as a {@code CUstream} to use an implicit stream with legacy synchronization behavior.

*/ public static final long CU_STREAM_LEGACY = 0x1L; /** * Per-thread stream handle. * *

Stream handle that can be passed as a {@code CUstream} to use an implicit stream with per-thread synchronization behavior.

*/ public static final long CU_STREAM_PER_THREAD = 0x2L; /** * Event creation flags. ({@code CUevent_flags}) * *
Enum values:
* *
    *
  • {@link #CU_EVENT_DEFAULT EVENT_DEFAULT} - Default event flag
  • *
  • {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} - Event uses blocking synchronization
  • *
  • {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} - Event will not record timing data
  • *
  • {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} - Event is suitable for interprocess use. {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} must be set
  • *
*/ public static final int CU_EVENT_DEFAULT = 0x0, CU_EVENT_BLOCKING_SYNC = 0x1, CU_EVENT_DISABLE_TIMING = 0x2, CU_EVENT_INTERPROCESS = 0x4; /** * Event record flags. ({@code CUevent_record_flags}) * *
Enum values:
* *
    *
  • {@link #CU_EVENT_RECORD_DEFAULT EVENT_RECORD_DEFAULT} - Default event record flag
  • *
  • {@link #CU_EVENT_RECORD_EXTERNAL EVENT_RECORD_EXTERNAL} - * When using stream capture, create an event record node instead of the default behavior. * *

    This flag is invalid when used outside of capture.

    *
  • *
*/ public static final int CU_EVENT_RECORD_DEFAULT = 0x0, CU_EVENT_RECORD_EXTERNAL = 0x1; /** * Event wait flags. ({@code CUevent_wait_flags}) * *
Enum values:
* *
    *
  • {@link #CU_EVENT_WAIT_DEFAULT EVENT_WAIT_DEFAULT} - Default event wait flag
  • *
  • {@link #CU_EVENT_WAIT_EXTERNAL EVENT_WAIT_EXTERNAL} - * When using stream capture, create an event wait node instead of the default behavior. * *

    This flag is invalid when used outside of capture.

    *
  • *
*/ public static final int CU_EVENT_WAIT_DEFAULT = 0x0, CU_EVENT_WAIT_EXTERNAL = 0x1; /** * Flags for {@link #cuStreamWaitValue32 StreamWaitValue32} and {@link #cuStreamWaitValue64 StreamWaitValue64}. ({@code CUstreamWaitValue_flags}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_WAIT_VALUE_GEQ STREAM_WAIT_VALUE_GEQ} - * Wait until {@code (int32_t)(*addr - value) >= 0} (or {@code int64_t} for 64 bit values). Note this is a cyclic comparison which ignores * wraparound. (Default behavior.) *
  • *
  • {@link #CU_STREAM_WAIT_VALUE_EQ STREAM_WAIT_VALUE_EQ} - Wait until {@code *addr == value}.
  • *
  • {@link #CU_STREAM_WAIT_VALUE_AND STREAM_WAIT_VALUE_AND} - Wait until {@code (*addr & value) != 0}.
  • *
  • {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} - * Wait until {@code ~(*addr | value) != 0}. Support for this operation can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and * {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR}. *
  • *
  • {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} - * Follow the wait operation with a flush of outstanding remote writes. * *

    This means that, if a remote write operation is guaranteed to have reached the device before the wait can be satisfied, that write is guaranteed to * be visible to downstream device work. The device is permitted to reorder remote writes internally. For example, this flag would be required if two * remote writes arrive in a defined order, the wait is satisfied by the second write, and downstream work needs to observe the first write.

    * *

    Support for this operation is restricted to selected platforms and can be queried with {@code CU_DEVICE_ATTRIBUTE_CAN_USE_WAIT_VALUE_FLUSH}.

    *
  • *
*/ public static final int CU_STREAM_WAIT_VALUE_GEQ = 0x0, CU_STREAM_WAIT_VALUE_EQ = 0x1, CU_STREAM_WAIT_VALUE_AND = 0x2, CU_STREAM_WAIT_VALUE_NOR = 0x3, CU_STREAM_WAIT_VALUE_FLUSH = 1<<30; /** * Flags for {@link #cuStreamWriteValue32 StreamWriteValue32}. ({@code CUstreamWriteValue_flags}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_WRITE_VALUE_DEFAULT STREAM_WRITE_VALUE_DEFAULT} - Default behavior
  • *
  • {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} - * Permits the write to be reordered with writes which were issued before it, as a performance optimization. * *

    Normally, {@link #cuStreamWriteValue32 StreamWriteValue32} will provide a memory fence before the write, which has similar semantics to {@code __threadfence_system()} but is * scoped to the stream rather than a CUDA thread.

    *
  • *
*/ public static final int CU_STREAM_WRITE_VALUE_DEFAULT = 0x0, CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1; /** * Operations for {@link #cuStreamBatchMemOp StreamBatchMemOp}. ({@code CUstreamBatchMemOpType}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_MEM_OP_WAIT_VALUE_32 STREAM_MEM_OP_WAIT_VALUE_32} - Represents a {@link #cuStreamWaitValue32 StreamWaitValue32} operation
  • *
  • {@link #CU_STREAM_MEM_OP_WRITE_VALUE_32 STREAM_MEM_OP_WRITE_VALUE_32} - Represents a {@link #cuStreamWriteValue32 StreamWriteValue32} operation
  • *
  • {@link #CU_STREAM_MEM_OP_WAIT_VALUE_64 STREAM_MEM_OP_WAIT_VALUE_64} - Represents a {@link #cuStreamWaitValue64 StreamWaitValue64} operation
  • *
  • {@link #CU_STREAM_MEM_OP_WRITE_VALUE_64 STREAM_MEM_OP_WRITE_VALUE_64} - Represents a {@link #cuStreamWriteValue64 StreamWriteValue64} operation
  • *
  • {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} - This has the same effect as {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH}, but as a standalone operation.
  • *
*/ public static final int CU_STREAM_MEM_OP_WAIT_VALUE_32 = 0x1, CU_STREAM_MEM_OP_WRITE_VALUE_32 = 0x2, CU_STREAM_MEM_OP_WAIT_VALUE_64 = 0x4, CU_STREAM_MEM_OP_WRITE_VALUE_64 = 0x5, CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 0x3; /** * Occupancy calculator flag. ({@code CUoccupancy_flags}) * *
Enum values:
* *
    *
  • {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT} - Default behavior
  • *
  • {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} - Assume global caching is enabled and cannot be automatically turned off
  • *
*/ public static final int CU_OCCUPANCY_DEFAULT = 0x0, CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1; /** * Flags for {@link #cuStreamUpdateCaptureDependencies StreamUpdateCaptureDependencies}). ({@code CUstreamUpdateCaptureDependencies_flags}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES} - Add new nodes to the dependency set
  • *
  • {@link #CU_STREAM_SET_CAPTURE_DEPENDENCIES STREAM_SET_CAPTURE_DEPENDENCIES} - Replace the dependency set with the new nodes
  • *
*/ public static final int CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0x0, CU_STREAM_SET_CAPTURE_DEPENDENCIES = 0x1; /** * Array formats. ({@code CUarray_format}) * *
Enum values:
* *
    *
  • {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} - Unsigned 8-bit integers
  • *
  • {@link #CU_AD_FORMAT_UNSIGNED_INT16 AD_FORMAT_UNSIGNED_INT16} - Unsigned 16-bit integers
  • *
  • {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32} - Unsigned 32-bit integers
  • *
  • {@link #CU_AD_FORMAT_SIGNED_INT8 AD_FORMAT_SIGNED_INT8} - Signed 8-bit integers
  • *
  • {@link #CU_AD_FORMAT_SIGNED_INT16 AD_FORMAT_SIGNED_INT16} - Signed 16-bit integers
  • *
  • {@link #CU_AD_FORMAT_SIGNED_INT32 AD_FORMAT_SIGNED_INT32} - Signed 32-bit integers
  • *
  • {@link #CU_AD_FORMAT_HALF AD_FORMAT_HALF} - 16-bit floating point
  • *
  • {@link #CU_AD_FORMAT_FLOAT AD_FORMAT_FLOAT} - 32-bit floating point
  • *
  • {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12} - 8-bit YUV planar format, with 4:2:0 sampling
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT8X1 AD_FORMAT_UNORM_INT8X1} - 1 channel unsigned 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT8X2 AD_FORMAT_UNORM_INT8X2} - 2 channel unsigned 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT8X4 AD_FORMAT_UNORM_INT8X4} - 4 channel unsigned 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT16X1 AD_FORMAT_UNORM_INT16X1} - 1 channel unsigned 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT16X2 AD_FORMAT_UNORM_INT16X2} - 2 channel unsigned 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_UNORM_INT16X4 AD_FORMAT_UNORM_INT16X4} - 4 channel unsigned 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT8X1 AD_FORMAT_SNORM_INT8X1} - 1 channel signed 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT8X2 AD_FORMAT_SNORM_INT8X2} - 2 channel signed 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT8X4 AD_FORMAT_SNORM_INT8X4} - 4 channel signed 8-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT16X1 AD_FORMAT_SNORM_INT16X1} - 1 channel signed 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT16X2 AD_FORMAT_SNORM_INT16X2} - 2 channel signed 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_SNORM_INT16X4 AD_FORMAT_SNORM_INT16X4} - 4 channel signed 16-bit normalized integer
  • *
  • {@link #CU_AD_FORMAT_BC1_UNORM AD_FORMAT_BC1_UNORM} - 4 channel unsigned normalized block-compressed (BC1 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC1_UNORM_SRGB AD_FORMAT_BC1_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
  • *
  • {@link #CU_AD_FORMAT_BC2_UNORM AD_FORMAT_BC2_UNORM} - 4 channel unsigned normalized block-compressed (BC2 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC2_UNORM_SRGB AD_FORMAT_BC2_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
  • *
  • {@link #CU_AD_FORMAT_BC3_UNORM AD_FORMAT_BC3_UNORM} - 4 channel unsigned normalized block-compressed (BC3 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC3_UNORM_SRGB AD_FORMAT_BC3_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
  • *
  • {@link #CU_AD_FORMAT_BC4_UNORM AD_FORMAT_BC4_UNORM} - 1 channel unsigned normalized block-compressed (BC4 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC4_SNORM AD_FORMAT_BC4_SNORM} - 1 channel signed normalized block-compressed (BC4 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC5_UNORM AD_FORMAT_BC5_UNORM} - 2 channel unsigned normalized block-compressed (BC5 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC5_SNORM AD_FORMAT_BC5_SNORM} - 2 channel signed normalized block-compressed (BC5 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC6H_UF16 AD_FORMAT_BC6H_UF16} - 3 channel unsigned half-float block-compressed (BC6H compression) format
  • *
  • {@link #CU_AD_FORMAT_BC6H_SF16 AD_FORMAT_BC6H_SF16} - 3 channel signed half-float block-compressed (BC6H compression) format
  • *
  • {@link #CU_AD_FORMAT_BC7_UNORM AD_FORMAT_BC7_UNORM} - 4 channel unsigned normalized block-compressed (BC7 compression) format
  • *
  • {@link #CU_AD_FORMAT_BC7_UNORM_SRGB AD_FORMAT_BC7_UNORM_SRGB} - 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
  • *
*/ public static final int CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, CU_AD_FORMAT_SIGNED_INT8 = 0x08, CU_AD_FORMAT_SIGNED_INT16 = 0x09, CU_AD_FORMAT_SIGNED_INT32 = 0x0a, CU_AD_FORMAT_HALF = 0x10, CU_AD_FORMAT_FLOAT = 0x20, CU_AD_FORMAT_NV12 = 0xb0, CU_AD_FORMAT_UNORM_INT8X1 = 0xc0, CU_AD_FORMAT_UNORM_INT8X2 = 0xc1, CU_AD_FORMAT_UNORM_INT8X4 = 0xc2, CU_AD_FORMAT_UNORM_INT16X1 = 0xc3, CU_AD_FORMAT_UNORM_INT16X2 = 0xc4, CU_AD_FORMAT_UNORM_INT16X4 = 0xc5, CU_AD_FORMAT_SNORM_INT8X1 = 0xc6, CU_AD_FORMAT_SNORM_INT8X2 = 0xc7, CU_AD_FORMAT_SNORM_INT8X4 = 0xc8, CU_AD_FORMAT_SNORM_INT16X1 = 0xc9, CU_AD_FORMAT_SNORM_INT16X2 = 0xca, CU_AD_FORMAT_SNORM_INT16X4 = 0xcb, CU_AD_FORMAT_BC1_UNORM = 0x91, CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92, CU_AD_FORMAT_BC2_UNORM = 0x93, CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94, CU_AD_FORMAT_BC3_UNORM = 0x95, CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96, CU_AD_FORMAT_BC4_UNORM = 0x97, CU_AD_FORMAT_BC4_SNORM = 0x98, CU_AD_FORMAT_BC5_UNORM = 0x99, CU_AD_FORMAT_BC5_SNORM = 0x9a, CU_AD_FORMAT_BC6H_UF16 = 0x9b, CU_AD_FORMAT_BC6H_SF16 = 0x9c, CU_AD_FORMAT_BC7_UNORM = 0x9d, CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e; /** * Texture reference addressing modes. ({@code CUaddress_mode}) * *
Enum values:
* *
    *
  • {@link #CU_TR_ADDRESS_MODE_WRAP TR_ADDRESS_MODE_WRAP} - Wrapping address mode
  • *
  • {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP} - Clamp to edge address mode
  • *
  • {@link #CU_TR_ADDRESS_MODE_MIRROR TR_ADDRESS_MODE_MIRROR} - Mirror address mode
  • *
  • {@link #CU_TR_ADDRESS_MODE_BORDER TR_ADDRESS_MODE_BORDER} - Border address mode
  • *
*/ public static final int CU_TR_ADDRESS_MODE_WRAP = 0x0, CU_TR_ADDRESS_MODE_CLAMP = 0x1, CU_TR_ADDRESS_MODE_MIRROR = 0x2, CU_TR_ADDRESS_MODE_BORDER = 0x3; /** * Texture reference filtering modes. ({@code CUfilter_mode}) * *
Enum values:
* *
    *
  • {@link #CU_TR_FILTER_MODE_POINT TR_FILTER_MODE_POINT} - Point filter mode
  • *
  • {@link #CU_TR_FILTER_MODE_LINEAR TR_FILTER_MODE_LINEAR} - Linear filter mode
  • *
*/ public static final int CU_TR_FILTER_MODE_POINT = 0x0, CU_TR_FILTER_MODE_LINEAR = 0x1; /** * Device properties. ({@code CUdevice_attribute}) * *
Enum values:
* *
    *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK} - Maximum number of threads per block
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X} - Maximum block dimension X
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y} - Maximum block dimension Y
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z} - Maximum block dimension Z
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X DEVICE_ATTRIBUTE_MAX_GRID_DIM_X} - Maximum grid dimension X
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y} - Maximum grid dimension Y
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z} - Maximum grid dimension Z
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK} - Maximum shared memory available per block in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY} - Memory available on device for __constant__ variables in a CUDA C kernel in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_WARP_SIZE DEVICE_ATTRIBUTE_WARP_SIZE} - Warp size in threads
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH} - Maximum pitch in bytes allowed by memory copies
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK} - Maximum number of 32-bit registers available per block
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CLOCK_RATE DEVICE_ATTRIBUTE_CLOCK_RATE} - Typical clock frequency in kilohertz
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT} - Alignment requirement for textures
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GPU_OVERLAP DEVICE_ATTRIBUTE_GPU_OVERLAP} - Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead {@link #CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT}.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT} - Number of multiprocessors on device
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT} - Specifies whether there is a run time limit on kernels
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_INTEGRATED DEVICE_ATTRIBUTE_INTEGRATED} - Device is integrated with host memory
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY} - Device can map host memory into CUDA address space
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} - Compute mode (See {@code CUcomputemode} for details)
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH} - Maximum 1D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH} - Maximum 2D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT} - Maximum 2D texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH} - Maximum 3D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT} - Maximum 3D texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH} - Maximum 3D texture depth
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH} - Maximum 2D layered texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT} - Maximum 2D layered texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS} - Maximum layers in a 2D layered texture
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES} - Deprecated, use {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT} - Alignment requirement for surfaces
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS DEVICE_ATTRIBUTE_CONCURRENT_KERNELS} - Device can possibly execute multiple kernels concurrently
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_ECC_ENABLED DEVICE_ATTRIBUTE_ECC_ENABLED} - Device has ECC support enabled
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_PCI_BUS_ID DEVICE_ATTRIBUTE_PCI_BUS_ID} - PCI bus ID of the device
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID DEVICE_ATTRIBUTE_PCI_DEVICE_ID} - PCI device ID of the device
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_TCC_DRIVER DEVICE_ATTRIBUTE_TCC_DRIVER} - Device is using TCC driver model
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE} - Peak memory clock frequency in kilohertz
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH} - Global memory bus width in bits
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE DEVICE_ATTRIBUTE_L2_CACHE_SIZE} - Size of L2 cache in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR} - Maximum resident threads per multiprocessor
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT} - Number of asynchronous engines
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING} - Device shares a unified address space with the host
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH} - Maximum 1D layered texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS} - Maximum layers in a 1D layered texture
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER} - Deprecated, do not use.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH} - Maximum 2D texture width if {@link #CUDA_ARRAY3D_TEXTURE_GATHER} is set
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT} - Maximum 2D texture height if {@link #CUDA_ARRAY3D_TEXTURE_GATHER} is set
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE} - Alternate maximum 3D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE} - Alternate maximum 3D texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE} - Alternate maximum 3D texture depth
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID DEVICE_ATTRIBUTE_PCI_DOMAIN_ID} - PCI domain ID of the device
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT} - Pitch alignment requirement for textures
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH} - Maximum cubemap texture width/height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH} - Maximum cubemap layered texture width/height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS} - Maximum layers in a cubemap layered texture
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH} - Maximum 1D surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH} - Maximum 2D surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT} - Maximum 2D surface height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH} - Maximum 3D surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT} - Maximum 3D surface height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH} - Maximum 3D surface depth
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH} - Maximum 1D layered surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS} - Maximum layers in a 1D layered surface
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH} - Maximum 2D layered surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT} - Maximum 2D layered surface height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS} - Maximum layers in a 2D layered surface
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH} - Maximum cubemap surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH} - Maximum cubemap layered surface width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS} - Maximum layers in a cubemap layered surface
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH} - Deprecated, do not use. Use {@code cudaDeviceGetTexture1DLinearMaxWidth()} or {@link #cuDeviceGetTexture1DLinearMaxWidth DeviceGetTexture1DLinearMaxWidth} instead.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} - Maximum 2D linear texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} - Maximum 2D linear texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH} - Maximum 2D linear texture pitch in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH} - Maximum mipmapped 2D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT} - Maximum mipmapped 2D texture height
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR} - Major compute capability version number
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR} - Minor compute capability version number
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH} - Maximum mipmapped 1D texture width
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED} - Device supports stream priorities
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED} - Device supports caching globals in L1
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED} - Device supports caching locals in L1
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR} - Maximum shared memory available per multiprocessor in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR} - Maximum number of 32-bit registers available per multiprocessor
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY DEVICE_ATTRIBUTE_MANAGED_MEMORY} - Device can allocate managed memory on this system
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD DEVICE_ATTRIBUTE_MULTI_GPU_BOARD} - Device is on a multi-GPU board
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID} - Unique id for a group of devices on the same multi-GPU board
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED} - * Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current * hardware) *
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO} - Ratio of single precision performance (in floating-point operations per second) to double precision performance
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS} - Device supports coherently accessing pageable memory without calling cudaHostRegister on it
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} - Device can coherently access managed memory concurrently with the CPU
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED} - Device supports compute preemption.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM} - Device can access host registered memory at the same virtual address as the CPU
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS} - {@link #cuStreamBatchMemOp StreamBatchMemOp} and related APIs are supported.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS} - 64-bit operations are supported in {@link #cuStreamBatchMemOp StreamBatchMemOp} and related APIs.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR} - {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} is supported.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH} - Device supports launching cooperative kernels via {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH} - Deprecated, {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} is deprecated.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN} - Maximum optin shared memory per block
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES} - * The {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} flag and the {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} MemOp are supported on the device. See {@code CUDA_MEMOP} for * additional details. *
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED} - Device supports host memory registration via {@code cudaHostRegister()}.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES} - Device accesses pageable memory via the host's page tables.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST} - The host can directly access managed memory on the device without migration.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED} - Deprecated, Use {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED} - Device supports virtual memory management APIs like {@link #cuMemAddressReserve MemAddressReserve}, {@link #cuMemCreate MemCreate}, {@link #cuMemMap MemMap} and related APIs
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED} - Device supports exporting memory to a posix file descriptor with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED} - Device supports exporting memory to a Win32 NT handle with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED} - Device supports exporting memory to a Win32 KMT handle with {@link #cuMemExportToShareableHandle MemExportToShareableHandle}, if requested via {@link #cuMemCreate MemCreate}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR} - Maximum number of blocks per multiprocessor
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED} - Device supports compression of memory
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE} - Maximum L2 persisting lines capacity setting in bytes.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE} - Maximum value of {@link CUaccessPolicyWindow}{@code {@code num_bytes}}.
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED} - Device supports specifying the GPUDirect RDMA flag with {@link #cuMemCreate MemCreate}
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK} - Shared memory reserved by CUDA driver per block in bytes
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED} - Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED} - Device supports using the {@link #cuMemHostRegister MemHostRegister} flag {@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY} to register memory that must be mapped as read-only to the GPU
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED} - External timeline semaphore interop is supported on the device
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED} - Device supports using the {@link #cuMemAllocAsync MemAllocAsync} and {@code cuMemPool*} family of APIs
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED} - Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS} - * The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the {@code CUflushGPUDirectRDMAWritesOptions} * enum *
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING} - * GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See * {@code CUGPUDirectRDMAWritesOrdering} for the numerical values returned here. *
  • *
  • {@link #CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES} - Handle types supported with mempool based IPC
  • *
*/ public static final int CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119; /** * Pointer information. ({@code CUpointer_attribute}) * *
Enum values:
* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} - The {@code CUcontext} on which a pointer was allocated or registered
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE} - The {@code CUmemorytype} describing the physical location of a pointer
  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} - The address at which a pointer's memory may be accessed on the device
  • *
  • {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} - The address at which a pointer's memory may be accessed on the host
  • *
  • {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS} - A pair of tokens for use with the {@code nv-p2p.h} Linux kernel interface
  • *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} - Synchronize every synchronous memory operation initiated on this region
  • *
  • {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID} - A process-wide unique ID for an allocated memory region
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED} - Indicates if the pointer points to managed memory
  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL} - A device ordinal of a device on which a pointer was allocated or registered
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE} - 1 if this pointer maps to an allocation that is suitable for {@code cudaIpcGetMemHandle()}, 0 otherwise
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR} - Starting address for this requested pointer
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE} - Size of the address range for this requested pointer
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED} - 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES} - Bitmask of allowed {@code CUmemAllocationHandleType} for this allocation
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE} - 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAGS POINTER_ATTRIBUTE_ACCESS_FLAGS} - Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE} - Returns the {@code mempoo}l handle for the allocation if it was allocated from a {@code mempool}. Otherwise returns {@code NULL}.
  • *
*/ public static final int CU_POINTER_ATTRIBUTE_CONTEXT = 1, CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3, CU_POINTER_ATTRIBUTE_HOST_POINTER = 4, CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6, CU_POINTER_ATTRIBUTE_BUFFER_ID = 7, CU_POINTER_ATTRIBUTE_IS_MANAGED = 8, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9, CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10, CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11, CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12, CU_POINTER_ATTRIBUTE_MAPPED = 13, CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14, CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15, CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16, CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17; /** * Function properties. ({@code CUfunction_attribute}) * *
Enum values:
* *
    *
  • {@link #CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK} - * The maximum number of threads per block, beyond which a launch of the function would fail. This number depends on both the function and the device * on which the function is currently loaded. *
  • *
  • {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES} - * The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory * requested by the user at runtime. *
  • *
  • {@link #CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES FUNC_ATTRIBUTE_CONST_SIZE_BYTES} - The size in bytes of user-allocated constant memory required by this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES} - The size in bytes of local memory used by each thread of this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_NUM_REGS FUNC_ATTRIBUTE_NUM_REGS} - The number of registers used by each thread of this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_PTX_VERSION FUNC_ATTRIBUTE_PTX_VERSION} - * The PTX virtual architecture version for which the function was compiled. * *

    This value is the major PTX {@code version * 10 + the minor PTX version}, so a PTX version 1.3 function would return the value 13. Note that this * may return the undefined value of 0 for cubins compiled prior to CUDA 3.0.

    *
  • *
  • {@link #CU_FUNC_ATTRIBUTE_BINARY_VERSION FUNC_ATTRIBUTE_BINARY_VERSION} - * The binary architecture version for which the function was compiled. * *

    This value is the {@code major binary version * 10 + the minor binary version}, so a binary version 1.3 function would return the value 13. Note * that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary architecture version.

    *
  • *
  • {@link #CU_FUNC_ATTRIBUTE_CACHE_MODE_CA FUNC_ATTRIBUTE_CACHE_MODE_CA} - The attribute to indicate whether the function has been compiled with user specified option {@code "-Xptxas --dlcm=ca"} set.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES} - * The maximum size in bytes of dynamically-allocated shared memory that can be used by this function. * *

    If the user-specified dynamic shared memory size is larger than this value, the launch will fail.

    *
  • *
  • {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT} - * On devices where the L1 cache and shared memory use the same hardware resources, this sets the shared memory carveout preference, in percent of the total shared memory. Refer to {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR}. * *

    This is only a hint, and the driver can choose a different ratio if required to execute the function.

    *
  • *
*/ public static final int CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0x0, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 0x1, CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 0x2, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 0x3, CU_FUNC_ATTRIBUTE_NUM_REGS = 0x4, CU_FUNC_ATTRIBUTE_PTX_VERSION = 0x5, CU_FUNC_ATTRIBUTE_BINARY_VERSION = 0x6, CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 0x7, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 0x8, CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 0x9; /** * Function cache configurations. ({@code CUfunc_cache}) * *
Enum values:
* *
    *
  • {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} - no preference for shared memory or L1 (default)
  • *
  • {@link #CU_FUNC_CACHE_PREFER_SHARED FUNC_CACHE_PREFER_SHARED} - prefer larger shared memory and smaller L1 cache
  • *
  • {@link #CU_FUNC_CACHE_PREFER_L1 FUNC_CACHE_PREFER_L1} - prefer larger L1 cache and smaller shared memory
  • *
  • {@link #CU_FUNC_CACHE_PREFER_EQUAL FUNC_CACHE_PREFER_EQUAL} - prefer equal sized L1 cache and shared memory
  • *
*/ public static final int CU_FUNC_CACHE_PREFER_NONE = 0x0, CU_FUNC_CACHE_PREFER_SHARED = 0x1, CU_FUNC_CACHE_PREFER_L1 = 0x2, CU_FUNC_CACHE_PREFER_EQUAL = 0x3; /** * Shared memory configurations. ({@code CUsharedconfig}) * *
Enum values:
* *
    *
  • {@link #CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE} - set default shared memory bank size
  • *
  • {@link #CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE} - set shared memory bank width to four bytes
  • *
  • {@link #CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE} - set shared memory bank width to eight bytes
  • *
*/ public static final int CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x0, CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x1, CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x2; /** * Shared memory carveout configurations. ({@code CUshared_carveout}) * *

These may be passed to {@link #cuFuncSetAttribute FuncSetAttribute}.

* *
Enum values:
* *
    *
  • {@link #CU_SHAREDMEM_CARVEOUT_DEFAULT SHAREDMEM_CARVEOUT_DEFAULT} - no preference for shared memory or L1 (default)
  • *
  • {@link #CU_SHAREDMEM_CARVEOUT_MAX_SHARED SHAREDMEM_CARVEOUT_MAX_SHARED} - prefer maximum available shared memory, minimum L1 cache
  • *
  • {@link #CU_SHAREDMEM_CARVEOUT_MAX_L1 SHAREDMEM_CARVEOUT_MAX_L1} - prefer maximum available L1 cache, minimum shared memory
  • *
*/ public static final int CU_SHAREDMEM_CARVEOUT_DEFAULT = 0xFFFFFFFF, CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 0x64, CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0x0; /** * Memory types. ({@code CUmemorytype}) * *
Enum values:
* *
    *
  • {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST} - Host memory
  • *
  • {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE} - Device memory
  • *
  • {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY} - Array memory
  • *
  • {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED} - Unified device or host memory
  • *
*/ public static final int CU_MEMORYTYPE_HOST = 0x1, CU_MEMORYTYPE_DEVICE = 0x2, CU_MEMORYTYPE_ARRAY = 0x3, CU_MEMORYTYPE_UNIFIED = 0x4; /** * Compute Modes. ({@code CUcomputemode}) * *
Enum values:
* *
    *
  • {@link #CU_COMPUTEMODE_DEFAULT COMPUTEMODE_DEFAULT} - Default compute mode (Multiple contexts allowed per device)
  • *
  • {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED} - Compute-prohibited mode (No contexts can be created on this device at this time)
  • *
  • {@link #CU_COMPUTEMODE_EXCLUSIVE_PROCESS COMPUTEMODE_EXCLUSIVE_PROCESS} - Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time)
  • *
*/ public static final int CU_COMPUTEMODE_DEFAULT = 0x0, CU_COMPUTEMODE_PROHIBITED = 0x2, CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 0x3; /** * Memory advise values. ({@code CUmem_advise}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} - Data will mostly be read and only occassionally be written to
  • *
  • {@link #CU_MEM_ADVISE_UNSET_READ_MOSTLY MEM_ADVISE_UNSET_READ_MOSTLY} - Undo the effect of {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}
  • *
  • {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} - Set the preferred location for the data as the specified device
  • *
  • {@link #CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION MEM_ADVISE_UNSET_PREFERRED_LOCATION} - Clear the preferred location for the data
  • *
  • {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} - Data will be accessed by the specified device, so prevent page faults as much as possible
  • *
  • {@link #CU_MEM_ADVISE_UNSET_ACCESSED_BY MEM_ADVISE_UNSET_ACCESSED_BY} - Let the Unified Memory subsystem decide on the page faulting policy for the specified device
  • *
*/ public static final int CU_MEM_ADVISE_SET_READ_MOSTLY = 0x1, CU_MEM_ADVISE_UNSET_READ_MOSTLY = 0x2, CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 0x3, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 0x4, CU_MEM_ADVISE_SET_ACCESSED_BY = 0x5, CU_MEM_ADVISE_UNSET_ACCESSED_BY = 0x6; /** * ({@code CUmem_range_attribute}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY} - Whether the range will mostly be read and only occassionally be written to
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION} - The preferred location of the range
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY} - Memory range has {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} set for specified device
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION} - The last location to which the range was prefetched
  • *
*/ public static final int CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 0x1, CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 0x2, CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 0x3, CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 0x4; /** * Online compiler and linker options. ({@code CUjit_option}) * *
Enum values:
* *
    *
  • {@link #CU_JIT_MAX_REGISTERS JIT_MAX_REGISTERS} - * Max number of registers that a thread may use. * *

    Option type: {@code unsigned int}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_THREADS_PER_BLOCK JIT_THREADS_PER_BLOCK} - * IN: Specifies minimum number of threads per block to target compilation for * *

    OUT: Returns the number of threads the compiler actually targeted.

    * *

    This restricts the resource utilization fo the compiler (e.g. max registers) such that a block with the given number of threads should be able to * launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory * utilization.

    * *

    Cannot be combined with {@link #CU_JIT_TARGET JIT_TARGET}. Option type: {@code unsigned int}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_WALL_TIME JIT_WALL_TIME} - * Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker. * *

    Option type: {@code float}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER} - * Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option * {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES}). * *

    Option type: {@code char *}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES} - * IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator). * *

    OUT: Amount of log buffer filled with messages.

    * *

    Option type: {@code unsigned int}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER} - * Pointer to a buffer in which to print any log messages that reflect errors (the buffer size is specified via option * {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES}). * *

    Option type: {@code char *}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES} - * IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator). * *

    OUT: Amount of log buffer filled with messages.

    * *

    Option type: {@code unsigned int}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_OPTIMIZATION_LEVEL JIT_OPTIMIZATION_LEVEL} - * Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations. * *

    Option type: {@code unsigned int}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT} - * No option value required. Determines the target based on the current attached context (default). * *

    Option type: No option value needed. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_TARGET JIT_TARGET} - * Target is chosen based on supplied {@code CUjit_target}. Cannot be combined with {@link #CU_JIT_THREADS_PER_BLOCK JIT_THREADS_PER_BLOCK}. * *

    Option type: {@code unsigned int} for enumerated type {@code CUjit_target}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY} - * Specifies choice of fallback strategy if matching cubin is not found. * *

    Choice is based on supplied {@code CUjit_fallback}. This option cannot be used with {@code cuLink*} APIs as the linker requires exact matches.

    * *

    Option type: {@code unsigned int} for enumerated type {@code CUjit_fallback}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_GENERATE_DEBUG_INFO JIT_GENERATE_DEBUG_INFO} - * Specifies whether to create debug information in output (-g) (0: false, default). * *

    Option type: {@code int}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_LOG_VERBOSE JIT_LOG_VERBOSE} - * Generate verbose log messages (0: false, default). * *

    Option type: {@code int}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_GENERATE_LINE_INFO JIT_GENERATE_LINE_INFO} - * Generate line number information (-lineinfo) (0: false, default). * *

    Option type: {@code int}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_CACHE_MODE JIT_CACHE_MODE} - * Specifies whether to enable caching explicitly (-dlcm). Choice is based on supplied {@code CUjit_cacheMode_enum}. * *

    Option type: {@code unsigned int} for enumerated type {@code CUjit_cacheMode_enum}. Applies to: compiler only

    *
  • *
  • {@link #CU_JIT_NEW_SM3X_OPT JIT_NEW_SM3X_OPT} - Used for internal purposes only, in this version of CUDA.
  • *
  • {@link #CU_JIT_FAST_COMPILE JIT_FAST_COMPILE} - Used for internal purposes only, in this version of CUDA.
  • *
  • {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES} - * Array of device symbol names that will be relocated to the corresponing host addresses stored in {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES}. * *

    Must contain {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} entries. When loding a device module, driver will relocate all encountered unresolved symbols to the host * addresses. It is only allowed to register symbols that correspond to unresolved global variables. It is illegal to register the same device symbol * at multiple addresses.

    * *

    Option type: {@code const char **}. Applies to: dynamic linker only

    *
  • *
  • {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES} - * Array of host addresses that will be used to relocate corresponding device symbols stored in {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES}. * *

    Must contain {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} entries.

    * *

    Option type: {@code void **}. Applies to: dynamic linker only

    *
  • *
  • {@link #CU_JIT_GLOBAL_SYMBOL_COUNT JIT_GLOBAL_SYMBOL_COUNT} - * Number of entries in {@link #CU_JIT_GLOBAL_SYMBOL_NAMES JIT_GLOBAL_SYMBOL_NAMES} and {@link #CU_JIT_GLOBAL_SYMBOL_ADDRESSES JIT_GLOBAL_SYMBOL_ADDRESSES} arrays. * *

    Option type: {@code unsigned int}. Applies to: dynamic linker only

    *
  • *
  • {@link #CU_JIT_LTO JIT_LTO} - * Enable link-time optimization (-dlto) for device code (0: false, default) * *

    Option type: {@code int}. Applies to: compiler and linker

    *
  • *
  • {@link #CU_JIT_FTZ JIT_FTZ} - * Control single-precision denormals (-ftz) support (0: false, default). * *
      *
    • 1 : flushes denormal values to zero
    • *
    • 0 : preserves denormal values
    • *
    * *

    Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}

    *
  • *
  • {@link #CU_JIT_PREC_DIV JIT_PREC_DIV} - * Control single-precision floating-point division and reciprocals (-prec-div) support (1: true, default). * *
      *
    • 1 : Enables the IEEE round-to-nearest mode
    • *
    • 0 : Enables the fast approximation mode
    • *
    * *

    Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}

    *
  • *
  • {@link #CU_JIT_PREC_SQRT JIT_PREC_SQRT} - * Control single-precision floating-point square root (-prec-sqrt) support (1: true, default). * *
      *
    • 1 : Enables the IEEE round-to-nearest mode
    • *
    • 0 : Enables the fast approximation mode
    • *
    * *

    Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}

    *
  • *
  • {@link #CU_JIT_FMA JIT_FMA} - * Enable/Disable the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add (-fma) operations (1: Enable, * default; 0: Disable). * *

    Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}

    *
  • *
  • {@link #CU_JIT_NUM_OPTIONS JIT_NUM_OPTIONS} - * Enable/Disable the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add (-fma) operations (1: Enable, * default; 0: Disable). * *

    Option type: {@code int}. Applies to: link-time optimization specified with {@link #CU_JIT_LTO JIT_LTO}

    *
  • *
*/ public static final int CU_JIT_MAX_REGISTERS = 0x0, CU_JIT_THREADS_PER_BLOCK = 0x1, CU_JIT_WALL_TIME = 0x2, CU_JIT_INFO_LOG_BUFFER = 0x3, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 0x4, CU_JIT_ERROR_LOG_BUFFER = 0x5, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 0x6, CU_JIT_OPTIMIZATION_LEVEL = 0x7, CU_JIT_TARGET_FROM_CUCONTEXT = 0x8, CU_JIT_TARGET = 0x9, CU_JIT_FALLBACK_STRATEGY = 0xA, CU_JIT_GENERATE_DEBUG_INFO = 0xB, CU_JIT_LOG_VERBOSE = 0xC, CU_JIT_GENERATE_LINE_INFO = 0xD, CU_JIT_CACHE_MODE = 0xE, CU_JIT_NEW_SM3X_OPT = 0xF, CU_JIT_FAST_COMPILE = 0x10, CU_JIT_GLOBAL_SYMBOL_NAMES = 0x11, CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 0x12, CU_JIT_GLOBAL_SYMBOL_COUNT = 0x13, CU_JIT_LTO = 0x14, CU_JIT_FTZ = 0x15, CU_JIT_PREC_DIV = 0x16, CU_JIT_PREC_SQRT = 0x17, CU_JIT_FMA = 0x18, CU_JIT_NUM_OPTIONS = 0x19; /** * Online compilation targets. ({@code CUjit_target}) * *
Enum values:
* *
    *
  • {@link #CU_TARGET_COMPUTE_20 TARGET_COMPUTE_20} - Compute device class 2.0
  • *
  • {@link #CU_TARGET_COMPUTE_21 TARGET_COMPUTE_21} - Compute device class 2.1
  • *
  • {@link #CU_TARGET_COMPUTE_30 TARGET_COMPUTE_30} - Compute device class 3.0
  • *
  • {@link #CU_TARGET_COMPUTE_32 TARGET_COMPUTE_32} - Compute device class 3.2
  • *
  • {@link #CU_TARGET_COMPUTE_35 TARGET_COMPUTE_35} - Compute device class 3.5
  • *
  • {@link #CU_TARGET_COMPUTE_37 TARGET_COMPUTE_37} - Compute device class 3.7
  • *
  • {@link #CU_TARGET_COMPUTE_50 TARGET_COMPUTE_50} - Compute device class 5.0
  • *
  • {@link #CU_TARGET_COMPUTE_52 TARGET_COMPUTE_52} - Compute device class 5.2
  • *
  • {@link #CU_TARGET_COMPUTE_53 TARGET_COMPUTE_53} - Compute device class 5.3
  • *
  • {@link #CU_TARGET_COMPUTE_60 TARGET_COMPUTE_60} - Compute device class 6.0.
  • *
  • {@link #CU_TARGET_COMPUTE_61 TARGET_COMPUTE_61} - Compute device class 6.1.
  • *
  • {@link #CU_TARGET_COMPUTE_62 TARGET_COMPUTE_62} - Compute device class 6.2.
  • *
  • {@link #CU_TARGET_COMPUTE_70 TARGET_COMPUTE_70} - Compute device class 7.0.
  • *
  • {@link #CU_TARGET_COMPUTE_72 TARGET_COMPUTE_72} - Compute device class 7.2.
  • *
  • {@link #CU_TARGET_COMPUTE_75 TARGET_COMPUTE_75} - Compute device class 7.5.
  • *
  • {@link #CU_TARGET_COMPUTE_80 TARGET_COMPUTE_80} - Compute device class 8.0.
  • *
  • {@link #CU_TARGET_COMPUTE_86 TARGET_COMPUTE_86} - Compute device class 8.6.
  • *
*/ public static final int CU_TARGET_COMPUTE_20 = 20, CU_TARGET_COMPUTE_21 = 21, CU_TARGET_COMPUTE_30 = 30, CU_TARGET_COMPUTE_32 = 32, CU_TARGET_COMPUTE_35 = 35, CU_TARGET_COMPUTE_37 = 37, CU_TARGET_COMPUTE_50 = 50, CU_TARGET_COMPUTE_52 = 52, CU_TARGET_COMPUTE_53 = 53, CU_TARGET_COMPUTE_60 = 60, CU_TARGET_COMPUTE_61 = 61, CU_TARGET_COMPUTE_62 = 62, CU_TARGET_COMPUTE_70 = 70, CU_TARGET_COMPUTE_72 = 72, CU_TARGET_COMPUTE_75 = 75, CU_TARGET_COMPUTE_80 = 80, CU_TARGET_COMPUTE_86 = 86; /** * Cubin matching fallback strategies. ({@code CUjit_fallback}) * *
Enum values:
* *
    *
  • {@link #CU_PREFER_PTX PREFER_PTX} - Prefer to compile ptx if exact binary match not found
  • *
  • {@link #CU_PREFER_BINARY PREFER_BINARY} - Prefer to fall back to compatible binary code if exact match not found
  • *
*/ public static final int CU_PREFER_PTX = 0x0, CU_PREFER_BINARY = 0x1; /** * Caching modes for {@code dlcm}. ({@code CUjit_cacheMode}) * *
Enum values:
* *
    *
  • {@link #CU_JIT_CACHE_OPTION_NONE JIT_CACHE_OPTION_NONE} - Compile with no -dlcm flag specified
  • *
  • {@link #CU_JIT_CACHE_OPTION_CG JIT_CACHE_OPTION_CG} - Compile with L1 cache disabled
  • *
  • {@link #CU_JIT_CACHE_OPTION_CA JIT_CACHE_OPTION_CA} - Compile with L1 cache enabled
  • *
*/ public static final int CU_JIT_CACHE_OPTION_NONE = 0x0, CU_JIT_CACHE_OPTION_CG = 0x1, CU_JIT_CACHE_OPTION_CA = 0x2; /** * Device code formats. ({@code CUjitInputType}) * *
Enum values:
* *
    *
  • {@link #CU_JIT_INPUT_CUBIN JIT_INPUT_CUBIN} - * Compiled device-class-specific device code * *

    Applicable options: none

    *
  • *
  • {@link #CU_JIT_INPUT_PTX JIT_INPUT_PTX} - * PTX source code. * *

    Applicable options: PTX compiler options

    *
  • *
  • {@link #CU_JIT_INPUT_FATBINARY JIT_INPUT_FATBINARY} - * Bundle of multiple cubins and/or PTX of some device code. * *

    Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}

    *
  • *
  • {@link #CU_JIT_INPUT_OBJECT JIT_INPUT_OBJECT} - * Host object with embedded device code. * *

    Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}

    *
  • *
  • {@link #CU_JIT_INPUT_LIBRARY JIT_INPUT_LIBRARY} - * Archive of host objects with embedded device code. * *

    Applicable options: PTX compiler options, {@link #CU_JIT_FALLBACK_STRATEGY JIT_FALLBACK_STRATEGY}

    *
  • *
  • {@link #CU_JIT_INPUT_NVVM JIT_INPUT_NVVM} - * High-level intermediate code for link-time optimization. * *

    Applicable options: NVVM compiler options, PTX compiler options

    *
  • *
*/ public static final int CU_JIT_INPUT_CUBIN = 0, CU_JIT_INPUT_PTX = 1, CU_JIT_INPUT_FATBINARY = 2, CU_JIT_INPUT_OBJECT = 3, CU_JIT_INPUT_LIBRARY = 4, CU_JIT_INPUT_NVVM = 5; /** * Flags to register a graphics resource. ({@code CUgraphicsRegisterFlags}) * *
Enum values:
* *
    *
  • {@link #CU_GRAPHICS_REGISTER_FLAGS_NONE GRAPHICS_REGISTER_FLAGS_NONE}
  • *
  • {@link #CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY GRAPHICS_REGISTER_FLAGS_READ_ONLY}
  • *
  • {@link #CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD}
  • *
  • {@link #CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST GRAPHICS_REGISTER_FLAGS_SURFACE_LDST}
  • *
  • {@link #CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER}
  • *
*/ public static final int CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x0, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x1, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x2, CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x4, CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x8; /** * Flags for mapping and unmapping interop resources. ({@code CUgraphicsMapResourceFlags}) * *
Enum values:
* *
    *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE GRAPHICS_MAP_RESOURCE_FLAGS_NONE}
  • *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY}
  • *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD}
  • *
*/ public static final int CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x0, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x1, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x2; /** * Array indices for cube faces. ({@code CUarray_cubemap_face}) * *
Enum values:
* *
    *
  • {@link #CU_CUBEMAP_FACE_POSITIVE_X CUBEMAP_FACE_POSITIVE_X} - Positive X face of cubemap
  • *
  • {@link #CU_CUBEMAP_FACE_NEGATIVE_X CUBEMAP_FACE_NEGATIVE_X} - Negative X face of cubemap
  • *
  • {@link #CU_CUBEMAP_FACE_POSITIVE_Y CUBEMAP_FACE_POSITIVE_Y} - Positive Y face of cubemap
  • *
  • {@link #CU_CUBEMAP_FACE_NEGATIVE_Y CUBEMAP_FACE_NEGATIVE_Y} - Negative Y face of cubemap
  • *
  • {@link #CU_CUBEMAP_FACE_POSITIVE_Z CUBEMAP_FACE_POSITIVE_Z} - Positive Z face of cubemap
  • *
  • {@link #CU_CUBEMAP_FACE_NEGATIVE_Z CUBEMAP_FACE_NEGATIVE_Z} - Negative Z face of cubemap
  • *
*/ public static final int CU_CUBEMAP_FACE_POSITIVE_X = 0x0, CU_CUBEMAP_FACE_NEGATIVE_X = 0x1, CU_CUBEMAP_FACE_POSITIVE_Y = 0x2, CU_CUBEMAP_FACE_NEGATIVE_Y = 0x3, CU_CUBEMAP_FACE_POSITIVE_Z = 0x4, CU_CUBEMAP_FACE_NEGATIVE_Z = 0x5; /** * Limits. ({@code CUlimit}) * *
Enum values:
* *
    *
  • {@link #CU_LIMIT_STACK_SIZE LIMIT_STACK_SIZE} - GPU thread stack size
  • *
  • {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE} - GPU printf FIFO size
  • *
  • {@link #CU_LIMIT_MALLOC_HEAP_SIZE LIMIT_MALLOC_HEAP_SIZE} - GPU malloc heap size
  • *
  • {@link #CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH LIMIT_DEV_RUNTIME_SYNC_DEPTH} - GPU device runtime launch synchronize depth
  • *
  • {@link #CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT} - GPU device runtime pending launch count
  • *
  • {@link #CU_LIMIT_MAX_L2_FETCH_GRANULARITY LIMIT_MAX_L2_FETCH_GRANULARITY} - A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
  • *
  • {@link #CU_LIMIT_PERSISTING_L2_CACHE_SIZE LIMIT_PERSISTING_L2_CACHE_SIZE} - A size in bytes for L2 persisting lines cache size
  • *
*/ public static final int CU_LIMIT_STACK_SIZE = 0x00, CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, CU_LIMIT_MALLOC_HEAP_SIZE = 0x02, CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03, CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04, CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05, CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06; /** * Resource types. ({@code CUresourcetype}) * *
Enum values:
* *
    *
  • {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} - Array resoure
  • *
  • {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY} - Mipmapped array resource
  • *
  • {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR} - Linear resource
  • *
  • {@link #CU_RESOURCE_TYPE_PITCH2D RESOURCE_TYPE_PITCH2D} - Pitch 2D resource
  • *
*/ public static final int CU_RESOURCE_TYPE_ARRAY = 0x0, CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x1, CU_RESOURCE_TYPE_LINEAR = 0x2, CU_RESOURCE_TYPE_PITCH2D = 0x3; /** * Specifies performance hint with {@link CUaccessPolicyWindow} for {@code hitProp} and {@code missProp} members. ({@code CUaccessProperty}) * *
Enum values:
* *
    *
  • {@link #CU_ACCESS_PROPERTY_NORMAL ACCESS_PROPERTY_NORMAL} - Normal cache persistence.
  • *
  • {@link #CU_ACCESS_PROPERTY_STREAMING ACCESS_PROPERTY_STREAMING} - Streaming access is less likely to persit from cache.
  • *
  • {@link #CU_ACCESS_PROPERTY_PERSISTING ACCESS_PROPERTY_PERSISTING} - Persisting access is more likely to persist in cache.
  • *
*/ public static final int CU_ACCESS_PROPERTY_NORMAL = 0, CU_ACCESS_PROPERTY_STREAMING = 1, CU_ACCESS_PROPERTY_PERSISTING = 2; /** * Graph node types. ({@code CUgraphNodeType}) * *
Enum values:
* *
    *
  • {@link #CU_GRAPH_NODE_TYPE_KERNEL GRAPH_NODE_TYPE_KERNEL} - GPU kernel node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_MEMCPY GRAPH_NODE_TYPE_MEMCPY} - Memcpy node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_MEMSET GRAPH_NODE_TYPE_MEMSET} - Memset node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_HOST GRAPH_NODE_TYPE_HOST} - Host (executable) node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_GRAPH GRAPH_NODE_TYPE_GRAPH} - Node which executes an embedded graph
  • *
  • {@link #CU_GRAPH_NODE_TYPE_EMPTY GRAPH_NODE_TYPE_EMPTY} - Empty (no-op) node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_WAIT_EVENT GRAPH_NODE_TYPE_WAIT_EVENT} - External event wait node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_EVENT_RECORD GRAPH_NODE_TYPE_EVENT_RECORD} - External event record node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL} - External semaphore signal node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT GRAPH_NODE_TYPE_EXT_SEMAS_WAIT} - External semaphore wait node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_MEM_ALLOC GRAPH_NODE_TYPE_MEM_ALLOC} - Memory Allocation Node
  • *
  • {@link #CU_GRAPH_NODE_TYPE_MEM_FREE GRAPH_NODE_TYPE_MEM_FREE} - Memory Free Node
  • *
*/ public static final int CU_GRAPH_NODE_TYPE_KERNEL = 0, CU_GRAPH_NODE_TYPE_MEMCPY = 1, CU_GRAPH_NODE_TYPE_MEMSET = 2, CU_GRAPH_NODE_TYPE_HOST = 3, CU_GRAPH_NODE_TYPE_GRAPH = 4, CU_GRAPH_NODE_TYPE_EMPTY = 5, CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6, CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7, CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8, CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9, CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10, CU_GRAPH_NODE_TYPE_MEM_FREE = 11; /** * {@code CUsynchronizationPolicy} * *
Enum values:
* *
    *
  • {@link #CU_SYNC_POLICY_AUTO SYNC_POLICY_AUTO}
  • *
  • {@link #CU_SYNC_POLICY_SPIN SYNC_POLICY_SPIN}
  • *
  • {@link #CU_SYNC_POLICY_YIELD SYNC_POLICY_YIELD}
  • *
  • {@link #CU_SYNC_POLICY_BLOCKING_SYNC SYNC_POLICY_BLOCKING_SYNC}
  • *
*/ public static final int CU_SYNC_POLICY_AUTO = 1, CU_SYNC_POLICY_SPIN = 2, CU_SYNC_POLICY_YIELD = 3, CU_SYNC_POLICY_BLOCKING_SYNC = 4; /** * Graph kernel node Attributes ({@code CUkernelNodeAttrID}) * *
Enum values:
* *
    *
  • {@link #CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW} - Identifier for {@link CUkernelNodeAttrValue}{@code {@code accessPolicyWindow}}.
  • *
  • {@link #CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE KERNEL_NODE_ATTRIBUTE_COOPERATIVE} - Allows a kernel node to be cooperative (see {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}).
  • *
*/ public static final int CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1, CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2; /** * Possible stream capture statuses returned by {@link #cuStreamIsCapturing StreamIsCapturing}. ({@code CUstreamCaptureStatus}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_CAPTURE_STATUS_NONE STREAM_CAPTURE_STATUS_NONE} - Stream is not capturing
  • *
  • {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE} - Stream is actively capturing
  • *
  • {@link #CU_STREAM_CAPTURE_STATUS_INVALIDATED STREAM_CAPTURE_STATUS_INVALIDATED} - Stream is part of a capture sequence that has been invalidated, but not terminated
  • *
*/ public static final int CU_STREAM_CAPTURE_STATUS_NONE = 0x0, CU_STREAM_CAPTURE_STATUS_ACTIVE = 0x1, CU_STREAM_CAPTURE_STATUS_INVALIDATED = 0x2; /** * Possible modes for stream capture thread interactions. ({@code CUstreamCaptureMode}) * *

For more details see {@link #cuStreamBeginCapture StreamBeginCapture} and {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode}

* *
Enum values:
* *
    *
  • {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}
  • *
  • {@link #CU_STREAM_CAPTURE_MODE_THREAD_LOCAL STREAM_CAPTURE_MODE_THREAD_LOCAL}
  • *
  • {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}
  • *
*/ public static final int CU_STREAM_CAPTURE_MODE_GLOBAL = 0, CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1, CU_STREAM_CAPTURE_MODE_RELAXED = 2; /** * Stream Attributes ({@code CUstreamAttrID}) * *
Enum values:
* *
    *
  • {@link #CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW} - Identifier for {@link CUstreamAttrValue}{@code {@code accessPolicyWindow}}.
  • *
  • {@link #CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY} - {@code CUsynchronizationPolicy} for work queued up in this stream
  • *
*/ public static final int CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1, CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3; /** * Flags to specify search options. For more details see {@link #cuGetProcAddress GetProcAddress}. ({@code CUdriverProcAddress_flags}) * *
Enum values:
* *
    *
  • {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT} - Default search mode for driver symbols.
  • *
  • {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} - Search for legacy versions of driver symbols.
  • *
  • {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} - Search for per-thread versions of driver symbols.
  • *
*/ public static final int CU_GET_PROC_ADDRESS_DEFAULT = 0, CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1 << 0, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1 << 1; /** * Execution Affinity Types * *

({@code CUexecAffinityType})

* *
Enum values:
* *
    *
  • {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT} - Create a context with limited SMs.
  • *
  • {@link #CU_EXEC_AFFINITY_TYPE_MAX EXEC_AFFINITY_TYPE_MAX}
  • *
*/ public static final int CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0, CU_EXEC_AFFINITY_TYPE_MAX = 1; /** * Error codes. ({@code CUresult}) * *
Enum values:
* *
    *
  • {@link #CUDA_SUCCESS CUDA_SUCCESS} - * The API call returned with no errors. * *

    In the case of query calls, this also means that the operation being queried is complete (see {@link #cuEventQuery EventQuery} and {@link #cuStreamQuery StreamQuery}).

    *
  • *
  • {@link #CUDA_ERROR_INVALID_VALUE CUDA_ERROR_INVALID_VALUE} - This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
  • *
  • {@link #CUDA_ERROR_OUT_OF_MEMORY CUDA_ERROR_OUT_OF_MEMORY} - The API call failed because it was unable to allocate enough memory to perform the requested operation.
  • *
  • {@link #CUDA_ERROR_NOT_INITIALIZED CUDA_ERROR_NOT_INITIALIZED} - This indicates that the CUDA driver has not been initialized with {@link #cuInit Init} or that initialization has failed.
  • *
  • {@link #CUDA_ERROR_DEINITIALIZED CUDA_ERROR_DEINITIALIZED} - This indicates that the CUDA driver is in the process of shutting down.
  • *
  • {@link #CUDA_ERROR_PROFILER_DISABLED CUDA_ERROR_PROFILER_DISABLED} - * This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual * profiler. *
  • *
  • {@link #CUDA_ERROR_PROFILER_NOT_INITIALIZED CUDA_ERROR_PROFILER_NOT_INITIALIZED} - * Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the profiling via * {@link CUDAProfiler#cuProfilerStart ProfilerStart} or {@link CUDAProfiler#cuProfilerStop ProfilerStop} without initialization. *
  • *
  • {@link #CUDA_ERROR_PROFILER_ALREADY_STARTED CUDA_ERROR_PROFILER_ALREADY_STARTED} - * Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to call {@link CUDAProfiler#cuProfilerStart ProfilerStart} when profiling is already enabled. *
  • *
  • {@link #CUDA_ERROR_PROFILER_ALREADY_STOPPED CUDA_ERROR_PROFILER_ALREADY_STOPPED} - * Deprecated: This error return is deprecated as of CUDA 5.0. It is no longer an error to call {@link CUDAProfiler#cuProfilerStop ProfilerStop} when profiling is already disabled. *
  • *
  • {@link #CUDA_ERROR_STUB_LIBRARY CUDA_ERROR_STUB_LIBRARY} - * This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real * driver loaded will result in CUDA API returning this error. *
  • *
  • {@link #CUDA_ERROR_NO_DEVICE CUDA_ERROR_NO_DEVICE} - This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
  • *
  • {@link #CUDA_ERROR_INVALID_DEVICE CUDA_ERROR_INVALID_DEVICE} - * This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for * the specified device. *
  • *
  • {@link #CUDA_ERROR_DEVICE_NOT_LICENSED CUDA_ERROR_DEVICE_NOT_LICENSED} - This error indicates that the Grid license is not applied.
  • *
  • {@link #CUDA_ERROR_INVALID_IMAGE CUDA_ERROR_INVALID_IMAGE} - This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module.
  • *
  • {@link #CUDA_ERROR_INVALID_CONTEXT CUDA_ERROR_INVALID_CONTEXT} - * This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call * is not a valid handle (such as a context that has had {@link #cuCtxDestroy CtxDestroy} invoked on it). This can also be returned if a user mixes different API * versions (i.e. 3010 context with 3020 API calls). See {@link #cuCtxGetApiVersion CtxGetApiVersion} for more details. *
  • *
  • {@link #CUDA_ERROR_CONTEXT_ALREADY_CURRENT CUDA_ERROR_CONTEXT_ALREADY_CURRENT} - * This indicated that the context being supplied as a parameter to the API call was already the active context.Deprecated: This error return is * deprecated as of CUDA 3.2. It is no longer an error to attempt to push the active context via {@link #cuCtxPushCurrent CtxPushCurrent}. *
  • *
  • {@link #CUDA_ERROR_MAP_FAILED CUDA_ERROR_MAP_FAILED} - This indicates that a map or register operation has failed.
  • *
  • {@link #CUDA_ERROR_UNMAP_FAILED CUDA_ERROR_UNMAP_FAILED} - This indicates that an unmap or unregister operation has failed.
  • *
  • {@link #CUDA_ERROR_ARRAY_IS_MAPPED CUDA_ERROR_ARRAY_IS_MAPPED} - This indicates that the specified array is currently mapped and thus cannot be destroyed.
  • *
  • {@link #CUDA_ERROR_ALREADY_MAPPED CUDA_ERROR_ALREADY_MAPPED} - This indicates that the resource is already mapped.
  • *
  • {@link #CUDA_ERROR_NO_BINARY_FOR_GPU CUDA_ERROR_NO_BINARY_FOR_GPU} - * This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation * options for a particular CUDA source file that do not include the corresponding device configuration. *
  • *
  • {@link #CUDA_ERROR_ALREADY_ACQUIRED CUDA_ERROR_ALREADY_ACQUIRED} - This indicates that a resource has already been acquired.
  • *
  • {@link #CUDA_ERROR_NOT_MAPPED CUDA_ERROR_NOT_MAPPED} - This indicates that a resource is not mapped.
  • *
  • {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY CUDA_ERROR_NOT_MAPPED_AS_ARRAY} - This indicates that a mapped resource is not available for access as an array.
  • *
  • {@link #CUDA_ERROR_NOT_MAPPED_AS_POINTER CUDA_ERROR_NOT_MAPPED_AS_POINTER} - This indicates that a mapped resource is not available for access as a pointer.
  • *
  • {@link #CUDA_ERROR_ECC_UNCORRECTABLE CUDA_ERROR_ECC_UNCORRECTABLE} - This indicates that an uncorrectable ECC error was detected during execution.
  • *
  • {@link #CUDA_ERROR_UNSUPPORTED_LIMIT CUDA_ERROR_UNSUPPORTED_LIMIT} - This indicates that the {@code CUlimit} passed to the API call is not supported by the active device.
  • *
  • {@link #CUDA_ERROR_CONTEXT_ALREADY_IN_USE CUDA_ERROR_CONTEXT_ALREADY_IN_USE} - * This indicates that the {@code CUcontext} passed to the API call can only be bound to a single CPU thread at a time but is already bound to a CPU thread. *
  • *
  • {@link #CUDA_ERROR_PEER_ACCESS_UNSUPPORTED CUDA_ERROR_PEER_ACCESS_UNSUPPORTED} - This indicates that peer access is not supported across the given devices.
  • *
  • {@link #CUDA_ERROR_INVALID_PTX CUDA_ERROR_INVALID_PTX} - This indicates that a PTX JIT compilation failed.
  • *
  • {@link #CUDA_ERROR_INVALID_GRAPHICS_CONTEXT CUDA_ERROR_INVALID_GRAPHICS_CONTEXT} - This indicates an error with OpenGL or DirectX context.
  • *
  • {@link #CUDA_ERROR_NVLINK_UNCORRECTABLE CUDA_ERROR_NVLINK_UNCORRECTABLE} - This indicates that an uncorrectable NVLink error was detected during the execution.
  • *
  • {@link #CUDA_ERROR_JIT_COMPILER_NOT_FOUND CUDA_ERROR_JIT_COMPILER_NOT_FOUND} - This indicates that the PTX JIT compiler library was not found.
  • *
  • {@link #CUDA_ERROR_UNSUPPORTED_PTX_VERSION CUDA_ERROR_UNSUPPORTED_PTX_VERSION} - This indicates that the provided PTX was compiled with an unsupported toolchain.
  • *
  • {@link #CUDA_ERROR_JIT_COMPILATION_DISABLED CUDA_ERROR_JIT_COMPILATION_DISABLED} - This indicates that the PTX JIT compilation was disabled.
  • *
  • {@link #CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY} - This indicates that the {@code CUexecAffinityType} passed to the API call is not supported by the active device.
  • *
  • {@link #CUDA_ERROR_INVALID_SOURCE CUDA_ERROR_INVALID_SOURCE} - This indicates that the device kernel source is invalid. This includes compilation/linker errors encountered in device code or user error.
  • *
  • {@link #CUDA_ERROR_FILE_NOT_FOUND CUDA_ERROR_FILE_NOT_FOUND} - This indicates that the file specified was not found.
  • *
  • {@link #CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND} - This indicates that a link to a shared object failed to resolve.
  • *
  • {@link #CUDA_ERROR_SHARED_OBJECT_INIT_FAILED CUDA_ERROR_SHARED_OBJECT_INIT_FAILED} - This indicates that initialization of a shared object failed.
  • *
  • {@link #CUDA_ERROR_OPERATING_SYSTEM CUDA_ERROR_OPERATING_SYSTEM} - This indicates that an OS call failed.
  • *
  • {@link #CUDA_ERROR_INVALID_HANDLE CUDA_ERROR_INVALID_HANDLE} - * This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like {@code CUstream} and * {@code CUevent}. *
  • *
  • {@link #CUDA_ERROR_ILLEGAL_STATE CUDA_ERROR_ILLEGAL_STATE} - This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
  • *
  • {@link #CUDA_ERROR_NOT_FOUND CUDA_ERROR_NOT_FOUND} - * This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and * surface names. *
  • *
  • {@link #CUDA_ERROR_NOT_READY CUDA_ERROR_NOT_READY} - * This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated * differently than {@link #CUDA_SUCCESS} (which indicates completion). Calls that may return this value include {@link #cuEventQuery EventQuery} and {@link #cuStreamQuery StreamQuery}. *
  • *
  • {@link #CUDA_ERROR_ILLEGAL_ADDRESS CUDA_ERROR_ILLEGAL_ADDRESS} - * While executing a kernel, the device encountered a load or store instruction on an invalid memory address. This leaves the process in an * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES} - * This indicates that a launch did not occur because it did not have appropriate resources. This error usually indicates that the user has attempted * to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count. Passing arguments * of the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many arguments and can also result in this * error. *
  • *
  • {@link #CUDA_ERROR_LAUNCH_TIMEOUT CUDA_ERROR_LAUNCH_TIMEOUT} - * This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device attribute * {@link #CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT} for more information. This leaves the process in an inconsistent state and any further CUDA work will * return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING} - This error indicates a kernel launch that uses an incompatible texturing mode.
  • *
  • {@link #CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED} - * This error indicates that a call to {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess} is trying to re-enable peer access to a context which has already had peer access to * it enabled. *
  • *
  • {@link #CUDA_ERROR_PEER_ACCESS_NOT_ENABLED CUDA_ERROR_PEER_ACCESS_NOT_ENABLED} - * This error indicates that {@link #cuCtxDisablePeerAccess CtxDisablePeerAccess} is trying to disable peer access which has not been enabled yet via {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}. *
  • *
  • {@link #CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE} - This error indicates that the primary context for the specified device has already been initialized.
  • *
  • {@link #CUDA_ERROR_CONTEXT_IS_DESTROYED CUDA_ERROR_CONTEXT_IS_DESTROYED} - * This error indicates that the context current to the calling thread has been destroyed using {@link #cuCtxDestroy CtxDestroy}, or is a primary context which has not * yet been initialized. *
  • *
  • {@link #CUDA_ERROR_ASSERT CUDA_ERROR_ASSERT} - * A device-side assert triggered during kernel execution. The context cannot be used anymore, and must be destroyed. All existing device memory * allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. *
  • *
  • {@link #CUDA_ERROR_TOO_MANY_PEERS CUDA_ERROR_TOO_MANY_PEERS} - * This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to * {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}. *
  • *
  • {@link #CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED} - This error indicates that the memory range passed to {@link #cuMemHostRegister MemHostRegister} has already been registered.
  • *
  • {@link #CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED} - This error indicates that the pointer passed to {@link #cuMemHostUnregister MemHostUnregister} does not correspond to any currently registered memory region.
  • *
  • {@link #CUDA_ERROR_HARDWARE_STACK_ERROR CUDA_ERROR_HARDWARE_STACK_ERROR} - * While executing a kernel, the device encountered a stack error. This can be due to stack corruption or exceeding the stack size limit. This leaves * the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated * and relaunched. *
  • *
  • {@link #CUDA_ERROR_ILLEGAL_INSTRUCTION CUDA_ERROR_ILLEGAL_INSTRUCTION} - * While executing a kernel, the device encountered an illegal instruction. This leaves the process in an inconsistent state and any further CUDA work * will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_MISALIGNED_ADDRESS CUDA_ERROR_MISALIGNED_ADDRESS} - * While executing a kernel, the device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in * an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_INVALID_ADDRESS_SPACE CUDA_ERROR_INVALID_ADDRESS_SPACE} - * While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, * shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state * and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_INVALID_PC CUDA_ERROR_INVALID_PC} - * While executing a kernel, the device program counter wrapped its address space. This leaves the process in an inconsistent state and any further * CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_LAUNCH_FAILED CUDA_ERROR_LAUNCH_FAILED} - * An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of * bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide. * This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be * terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE} - * This error indicates that the number of blocks launched per grid for a kernel that was launched via either {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} or * {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} exceeds the maximum number of blocks as allowed by {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} or * {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags} times the number of multiprocessors as specified by the device attribute * {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}. *
  • *
  • {@link #CUDA_ERROR_NOT_PERMITTED CUDA_ERROR_NOT_PERMITTED} - This error indicates that the attempted operation is not permitted.
  • *
  • {@link #CUDA_ERROR_NOT_SUPPORTED CUDA_ERROR_NOT_SUPPORTED} - This error indicates that the attempted operation is not supported on the current system or device.
  • *
  • {@link #CUDA_ERROR_SYSTEM_NOT_READY CUDA_ERROR_SYSTEM_NOT_READY} - * This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid * state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide. *
  • *
  • {@link #CUDA_ERROR_SYSTEM_DRIVER_MISMATCH CUDA_ERROR_SYSTEM_DRIVER_MISMATCH} - * This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility * documentation for supported versions. *
  • *
  • {@link #CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE} - * This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this * configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during * initialization via the {@code CUDA_VISIBLE_DEVICES} environment variable. *
  • *
  • {@link #CUDA_ERROR_MPS_CONNECTION_FAILED CUDA_ERROR_MPS_CONNECTION_FAILED} - This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
  • *
  • {@link #CUDA_ERROR_MPS_RPC_FAILURE CUDA_ERROR_MPS_RPC_FAILURE} - This error indicates that the remote procedural call between the MPS server and the MPS client failed.
  • *
  • {@link #CUDA_ERROR_MPS_SERVER_NOT_READY CUDA_ERROR_MPS_SERVER_NOT_READY} - * This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the * process of recovering from a fatal failure. *
  • *
  • {@link #CUDA_ERROR_MPS_MAX_CLIENTS_REACHED CUDA_ERROR_MPS_MAX_CLIENTS_REACHED} - This error indicates that the hardware resources required to create MPS client have been exhausted.
  • *
  • {@link #CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED} - This error indicates the the hardware resources required to support device connections have been exhausted.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED} - This error indicates that the operation is not permitted when the stream is capturing.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_INVALIDATED CUDA_ERROR_STREAM_CAPTURE_INVALIDATED} - This error indicates that the current capture sequence on the stream has been invalidated due to a previous error.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_MERGE CUDA_ERROR_STREAM_CAPTURE_MERGE} - This error indicates that the operation would have resulted in a merge of two independent capture sequences.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_UNMATCHED CUDA_ERROR_STREAM_CAPTURE_UNMATCHED} - This error indicates that the capture was not initiated in this stream.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED CUDA_ERROR_STREAM_CAPTURE_UNJOINED} - This error indicates that the capture sequence contains a fork that was not joined to the primary stream.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_ISOLATION CUDA_ERROR_STREAM_CAPTURE_ISOLATION} - * This error indicates that a dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering * dependencies are allowed to cross the boundary. *
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT CUDA_ERROR_STREAM_CAPTURE_IMPLICIT} - This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
  • *
  • {@link #CUDA_ERROR_CAPTURED_EVENT CUDA_ERROR_CAPTURED_EVENT} - This error indicates that the operation is not permitted on an event which was last recorded in a capturing stream.
  • *
  • {@link #CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD} - * A stream capture sequence not initiated with the {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED} argument to {@link #cuStreamBeginCapture StreamBeginCapture} was passed to {@link #cuStreamEndCapture StreamEndCapture} * in a different thread. *
  • *
  • {@link #CUDA_ERROR_TIMEOUT CUDA_ERROR_TIMEOUT} - This error indicates that the timeout specified for the wait operation has lapsed.
  • *
  • {@link #CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE} - * This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph * update. *
  • *
  • {@link #CUDA_ERROR_EXTERNAL_DEVICE CUDA_ERROR_EXTERNAL_DEVICE} - * This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming * shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. *
  • *
  • {@link #CUDA_ERROR_UNKNOWN CUDA_ERROR_UNKNOWN} - This indicates that an unknown internal error has occurred.
  • *
*/ public static final int CUDA_SUCCESS = 0, CUDA_ERROR_INVALID_VALUE = 1, CUDA_ERROR_OUT_OF_MEMORY = 2, CUDA_ERROR_NOT_INITIALIZED = 3, CUDA_ERROR_DEINITIALIZED = 4, CUDA_ERROR_PROFILER_DISABLED = 5, CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, CUDA_ERROR_STUB_LIBRARY = 34, CUDA_ERROR_NO_DEVICE = 100, CUDA_ERROR_INVALID_DEVICE = 101, CUDA_ERROR_DEVICE_NOT_LICENSED = 102, CUDA_ERROR_INVALID_IMAGE = 200, CUDA_ERROR_INVALID_CONTEXT = 201, CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, CUDA_ERROR_MAP_FAILED = 205, CUDA_ERROR_UNMAP_FAILED = 206, CUDA_ERROR_ARRAY_IS_MAPPED = 207, CUDA_ERROR_ALREADY_MAPPED = 208, CUDA_ERROR_NO_BINARY_FOR_GPU = 209, CUDA_ERROR_ALREADY_ACQUIRED = 210, CUDA_ERROR_NOT_MAPPED = 211, CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, CUDA_ERROR_ECC_UNCORRECTABLE = 214, CUDA_ERROR_UNSUPPORTED_LIMIT = 215, CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, CUDA_ERROR_INVALID_PTX = 218, CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, CUDA_ERROR_JIT_COMPILATION_DISABLED = 223, CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, CUDA_ERROR_OPERATING_SYSTEM = 304, CUDA_ERROR_INVALID_HANDLE = 400, CUDA_ERROR_ILLEGAL_STATE = 401, CUDA_ERROR_NOT_FOUND = 500, CUDA_ERROR_NOT_READY = 600, CUDA_ERROR_ILLEGAL_ADDRESS = 700, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, CUDA_ERROR_LAUNCH_TIMEOUT = 702, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, CUDA_ERROR_ASSERT = 710, CUDA_ERROR_TOO_MANY_PEERS = 711, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, CUDA_ERROR_HARDWARE_STACK_ERROR = 714, CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, CUDA_ERROR_MISALIGNED_ADDRESS = 716, CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, CUDA_ERROR_INVALID_PC = 718, CUDA_ERROR_LAUNCH_FAILED = 719, CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, CUDA_ERROR_NOT_PERMITTED = 800, CUDA_ERROR_NOT_SUPPORTED = 801, CUDA_ERROR_SYSTEM_NOT_READY = 802, CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803, CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, CUDA_ERROR_MPS_CONNECTION_FAILED = 805, CUDA_ERROR_MPS_RPC_FAILURE = 806, CUDA_ERROR_MPS_SERVER_NOT_READY = 807, CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808, CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809, CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901, CUDA_ERROR_STREAM_CAPTURE_MERGE = 902, CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903, CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904, CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905, CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906, CUDA_ERROR_CAPTURED_EVENT = 907, CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, CUDA_ERROR_TIMEOUT = 909, CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910, CUDA_ERROR_EXTERNAL_DEVICE = 911, CUDA_ERROR_UNKNOWN = 999; /** * P2P Attributes. ({@code CUdevice_P2PAttribute}) * *
Enum values:
* *
    *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK} - A relative value indicating the performance of the link between two devices
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED} - P2P Access is enable
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED} - Atomic operation over the link supported
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED} - Deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED} - Accessing CUDA arrays over the link supported
  • *
*/ public static final int CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x1, CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x2, CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x3, CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x4, CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x4; /** * Flags for {@link #cuMemHostAlloc MemHostAlloc}. * *
Enum values:
* *
    *
  • {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE} - If set, host memory is portable between CUDA contexts.
  • *
  • {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} - If set, host memory is mapped into CUDA address space and {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} may be called on the host pointer.
  • *
  • {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} - * If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction * ({@code MOVNTDQA}). *
  • *
*/ public static final int CU_MEMHOSTALLOC_PORTABLE = 0x1, CU_MEMHOSTALLOC_DEVICEMAP = 0x2, CU_MEMHOSTALLOC_WRITECOMBINED = 0x4; /** * Flags for {@link #cuMemHostRegister MemHostRegister}. * *
Enum values:
* *
    *
  • {@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE} - If set, host memory is portable between CUDA contexts.
  • *
  • {@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP} - If set, host memory is mapped into CUDA address space and {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} may be called on the host pointer.
  • *
  • {@link #CU_MEMHOSTREGISTER_IOMEMORY MEMHOSTREGISTER_IOMEMORY} - * If set, the passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. belonging to a third-party PCIe device. * *

    On Windows the flag is a no-op. On Linux that memory is marked as non cache-coherent for the GPU and is expected to be physically contiguous. * It may return {@link #CUDA_ERROR_NOT_PERMITTED} if run as an unprivileged user, {@link #CUDA_ERROR_NOT_SUPPORTED} on older Linux kernel versions. On all other * platforms, it is not supported and {@link #CUDA_ERROR_NOT_SUPPORTED} is returned.

    *
  • *
  • {@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY} - * If set, the passed memory pointer is treated as pointing to memory that is considered read-only by the device. * *

    On platforms without {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, this flag is required in order to register memory mapped to * the CPU as read-only. Support for the use of this flag can be queried from the device attribute * {@link #CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED}. Using this flag with a current context associated with a device that does not have this * attribute set will cause {@link #cuMemHostRegister MemHostRegister} to error with {@link #CUDA_ERROR_NOT_SUPPORTED}.

    *
  • *
*/ public static final int CU_MEMHOSTREGISTER_PORTABLE = 0x01, CU_MEMHOSTREGISTER_DEVICEMAP = 0x02, CU_MEMHOSTREGISTER_IOMEMORY = 0x04, CU_MEMHOSTREGISTER_READ_ONLY = 0x08; /** Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers. */ public static final int CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = 0x1; /** * Resource view format. ({@code CUresourceViewFormat}) * *
Enum values:
* *
    *
  • {@link #CU_RES_VIEW_FORMAT_NONE RES_VIEW_FORMAT_NONE} - No resource view format (use underlying resource format)
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_1X8 RES_VIEW_FORMAT_UINT_1X8} - 1 channel unsigned 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_2X8 RES_VIEW_FORMAT_UINT_2X8} - 2 channel unsigned 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_4X8 RES_VIEW_FORMAT_UINT_4X8} - 4 channel unsigned 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_1X8 RES_VIEW_FORMAT_SINT_1X8} - 1 channel signed 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_2X8 RES_VIEW_FORMAT_SINT_2X8} - 2 channel signed 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_4X8 RES_VIEW_FORMAT_SINT_4X8} - 4 channel signed 8-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_1X16 RES_VIEW_FORMAT_UINT_1X16} - 1 channel unsigned 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_2X16 RES_VIEW_FORMAT_UINT_2X16} - 2 channel unsigned 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_4X16 RES_VIEW_FORMAT_UINT_4X16} - 4 channel unsigned 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_1X16 RES_VIEW_FORMAT_SINT_1X16} - 1 channel signed 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_2X16 RES_VIEW_FORMAT_SINT_2X16} - 2 channel signed 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_4X16 RES_VIEW_FORMAT_SINT_4X16} - 4 channel signed 16-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_1X32 RES_VIEW_FORMAT_UINT_1X32} - 1 channel unsigned 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_2X32 RES_VIEW_FORMAT_UINT_2X32} - 2 channel unsigned 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_UINT_4X32 RES_VIEW_FORMAT_UINT_4X32} - 4 channel unsigned 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_1X32 RES_VIEW_FORMAT_SINT_1X32} - 1 channel signed 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_2X32 RES_VIEW_FORMAT_SINT_2X32} - 2 channel signed 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_SINT_4X32 RES_VIEW_FORMAT_SINT_4X32} - 4 channel signed 32-bit integers
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_1X16 RES_VIEW_FORMAT_FLOAT_1X16} - 1 channel 16-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_2X16 RES_VIEW_FORMAT_FLOAT_2X16} - 2 channel 16-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_4X16 RES_VIEW_FORMAT_FLOAT_4X16} - 4 channel 16-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_1X32 RES_VIEW_FORMAT_FLOAT_1X32} - 1 channel 32-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_2X32 RES_VIEW_FORMAT_FLOAT_2X32} - 2 channel 32-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_FLOAT_4X32 RES_VIEW_FORMAT_FLOAT_4X32} - 4 channel 32-bit floating point
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC1 RES_VIEW_FORMAT_UNSIGNED_BC1} - Block compressed 1
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC2 RES_VIEW_FORMAT_UNSIGNED_BC2} - Block compressed 2
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC3 RES_VIEW_FORMAT_UNSIGNED_BC3} - Block compressed 3
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC4 RES_VIEW_FORMAT_UNSIGNED_BC4} - Block compressed 4 unsigned
  • *
  • {@link #CU_RES_VIEW_FORMAT_SIGNED_BC4 RES_VIEW_FORMAT_SIGNED_BC4} - Block compressed 4 signed
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC5 RES_VIEW_FORMAT_UNSIGNED_BC5} - Block compressed 5 unsigned
  • *
  • {@link #CU_RES_VIEW_FORMAT_SIGNED_BC5 RES_VIEW_FORMAT_SIGNED_BC5} - Block compressed 5 signed
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC6H RES_VIEW_FORMAT_UNSIGNED_BC6H} - Block compressed 6 unsigned half-float
  • *
  • {@link #CU_RES_VIEW_FORMAT_SIGNED_BC6H RES_VIEW_FORMAT_SIGNED_BC6H} - Block compressed 6 signed half-float
  • *
  • {@link #CU_RES_VIEW_FORMAT_UNSIGNED_BC7 RES_VIEW_FORMAT_UNSIGNED_BC7} - Block compressed 7
  • *
*/ public static final int CU_RES_VIEW_FORMAT_NONE = 0x0, CU_RES_VIEW_FORMAT_UINT_1X8 = 0x1, CU_RES_VIEW_FORMAT_UINT_2X8 = 0x2, CU_RES_VIEW_FORMAT_UINT_4X8 = 0x3, CU_RES_VIEW_FORMAT_SINT_1X8 = 0x4, CU_RES_VIEW_FORMAT_SINT_2X8 = 0x5, CU_RES_VIEW_FORMAT_SINT_4X8 = 0x6, CU_RES_VIEW_FORMAT_UINT_1X16 = 0x7, CU_RES_VIEW_FORMAT_UINT_2X16 = 0x8, CU_RES_VIEW_FORMAT_UINT_4X16 = 0x9, CU_RES_VIEW_FORMAT_SINT_1X16 = 0xA, CU_RES_VIEW_FORMAT_SINT_2X16 = 0xB, CU_RES_VIEW_FORMAT_SINT_4X16 = 0xC, CU_RES_VIEW_FORMAT_UINT_1X32 = 0xD, CU_RES_VIEW_FORMAT_UINT_2X32 = 0xE, CU_RES_VIEW_FORMAT_UINT_4X32 = 0xF, CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1A, CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1B, CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1C, CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1D, CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1E, CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1F, CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22; /** * Access flags that specify the level of access the current context's device has on the memory referenced. ({@code CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS}) * *
Enum values:
* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE POINTER_ATTRIBUTE_ACCESS_FLAG_NONE} - * No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain * operations *
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ POINTER_ATTRIBUTE_ACCESS_FLAG_READ} - Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE} - Read-write access, the device has full read-write access to the memory
  • *
*/ public static final int CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0, CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1, CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3; /** * External memory handle types. ({@code CUexternalMemoryHandleType}) * *
Enum values:
* *
    *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD} - Handle is an opaque file descriptor
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32} - Handle is an opaque shared NT handle
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT} - Handle is an opaque, globally shared handle
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP} - Handle is a D3D12 heap object
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE} - Handle is a D3D12 committed resource
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE} - Handle is a shared NT handle to a D3D11 resource
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT} - Handle is a globally shared handle to a D3D11 resource
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF} - Handle is an NvSciBuf object
  • *
*/ public static final int CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1, CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2, CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4, CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5, CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6, CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7, CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8; /** Indicates that the external memory object is a dedicated resource. */ public static final int CUDA_EXTERNAL_MEMORY_DEDICATED = 0x1; /** * When the {@code flags} parameter of {@link CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS} contains this flag, it indicates that signaling an external semaphore * object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as * {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, which otherwise are performed by default to ensure data coherency with other importers of the same * {@code NvSciBuf} memory objects. */ public static final int CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC = 0x01; /** * When the {@code flags} parameter of {@link CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS} contains this flag, it indicates that waiting on an external * semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as * {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, which otherwise are performed by default to ensure data coherency with other importers of the same * {@code NvSciBuf} memory objects. */ public static final int CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC = 0x02; /** * When {@code flags} of {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} is set to this, it indicates that application needs signaler specific * {@code NvSciSyncAttr} to be filled by {@code cuDeviceGetNvSciSyncAttributes}. */ public static final int CUDA_NVSCISYNC_ATTR_SIGNAL = 0x1; /** * When {@code flags} of {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} is set to this, it indicates that application needs waiter specific {@code NvSciSyncAttr} to be * filled by {@code cuDeviceGetNvSciSyncAttributes}. */ public static final int CUDA_NVSCISYNC_ATTR_WAIT = 0x2; /** * External semaphore handle types. ({@code CUexternalSemaphoreHandleType}) * *
Enum values:
* *
    *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD} - Handle is an opaque file descriptor
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32} - Handle is an opaque shared NT handle
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} - Handle is an opaque, globally shared handle
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE} - Handle is a shared NT handle referencing a D3D12 fence object
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE} - Handle is a shared NT handle referencing a D3D11 fence object
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} - Opaque handle to NvSciSync Object
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX} - Handle is a shared NT handle referencing a D3D11 keyed mutex object
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} - Handle is a globally shared handle referencing a D3D11 keyed mutex object
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD} - Handle is an opaque file descriptor referencing a timeline semaphore
  • *
  • {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} - Handle is an opaque shared NT handle referencing a timeline semaphore
  • *
*/ public static final int CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9, CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10; /** * Flags for specifying particular handle types. ({@code CUmemAllocationHandleType}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE} - Does not allow any export mechanism.
  • *
  • {@link #CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR} - Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. ({@code int})
  • *
  • {@link #CU_MEM_HANDLE_TYPE_WIN32 MEM_HANDLE_TYPE_WIN32} - Allows a Win32 NT handle to be used for exporting. ({@code HANDLE})
  • *
  • {@link #CU_MEM_HANDLE_TYPE_WIN32_KMT MEM_HANDLE_TYPE_WIN32_KMT} - Allows a Win32 KMT handle to be used for exporting. ({@code D3DKMT_HANDLE})
  • *
*/ public static final int CU_MEM_HANDLE_TYPE_NONE = 0x0, CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1, CU_MEM_HANDLE_TYPE_WIN32 = 0x2, CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4; /** * Specifies the memory protection flags for mapping. ({@code CUmemAccess_flags}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ACCESS_FLAGS_PROT_NONE MEM_ACCESS_FLAGS_PROT_NONE} - Default, make the address range not accessible
  • *
  • {@link #CU_MEM_ACCESS_FLAGS_PROT_READ MEM_ACCESS_FLAGS_PROT_READ} - Make the address range read accessible
  • *
  • {@link #CU_MEM_ACCESS_FLAGS_PROT_READWRITE MEM_ACCESS_FLAGS_PROT_READWRITE} - Make the address range read-write accessible
  • *
*/ public static final int CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0, CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1, CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3; /** * Specifies the type of location. ({@code CUmemLocationType}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_LOCATION_TYPE_INVALID MEM_LOCATION_TYPE_INVALID}
  • *
  • {@link #CU_MEM_LOCATION_TYPE_DEVICE MEM_LOCATION_TYPE_DEVICE} - Location is a device location, thus id is a device ordinal
  • *
*/ public static final int CU_MEM_LOCATION_TYPE_INVALID = 0x0, CU_MEM_LOCATION_TYPE_DEVICE = 0x1; /** * Defines the allocation types available. ({@code CUmemAllocationType}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ALLOCATION_TYPE_INVALID MEM_ALLOCATION_TYPE_INVALID}
  • *
  • {@link #CU_MEM_ALLOCATION_TYPE_PINNED MEM_ALLOCATION_TYPE_PINNED} - This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
  • *
*/ public static final int CU_MEM_ALLOCATION_TYPE_INVALID = 0x0, CU_MEM_ALLOCATION_TYPE_PINNED = 0x1; /** * Flag for requesting different optimal and required granularities for an allocation. ({@code CUmemAllocationGranularity_flags}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} - Minimum required granularity for allocation
  • *
  • {@link #CU_MEM_ALLOC_GRANULARITY_RECOMMENDED MEM_ALLOC_GRANULARITY_RECOMMENDED} - Recommended granularity for allocation for best performance
  • *
*/ public static final int CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1; /** * Sparse subresource types. ({@code CUarraySparseSubresourceType}) * *
Enum values:
* *
    *
  • {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL}
  • *
  • {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL}
  • *
*/ public static final int CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0, CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1; /** * Memory operation types. ({@code CUmemOperationType}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP}
  • *
  • {@link #CU_MEM_OPERATION_TYPE_UNMAP MEM_OPERATION_TYPE_UNMAP}
  • *
*/ public static final int CU_MEM_OPERATION_TYPE_MAP = 1, CU_MEM_OPERATION_TYPE_UNMAP = 2; /** Memory handle types ({@code CUmemHandleType}) */ public static final int CU_MEM_HANDLE_TYPE_GENERIC = 0; /** * Specifies compression attribute for an allocation. ({@code CUmemAllocationCompType}) * *
Enum values:
* *
    *
  • {@link #CU_MEM_ALLOCATION_COMP_NONE MEM_ALLOCATION_COMP_NONE} - Allocating non-compressible memory
  • *
  • {@link #CU_MEM_ALLOCATION_COMP_GENERIC MEM_ALLOCATION_COMP_GENERIC} - Allocating compressible memory
  • *
*/ public static final int CU_MEM_ALLOCATION_COMP_NONE = 0x0, CU_MEM_ALLOCATION_COMP_GENERIC = 0x1; /** This flag if set indicates that the memory will be used as a tile pool. */ public static final int CU_MEM_CREATE_USAGE_TILE_POOL = 0x1; /** * {@code CUgraphExecUpdateResult} * *
Enum values:
* *
    *
  • {@link #CU_GRAPH_EXEC_UPDATE_SUCCESS GRAPH_EXEC_UPDATE_SUCCESS} - The update succeeded
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR} - The update failed for an unexpected reason which is described in the return value of the function
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} - The update failed because the topology changed
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED} - The update failed because a node type changed
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED} - The update failed because the function of a kernel node changed (CUDA driver <11.2)
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED} - The update failed because the parameters changed in a way that is not supported
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED} - The update failed because something about the node is not supported
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE} - The update failed because the function of a kernel node changed in an unsupported way
  • *
*/ public static final int CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0, CU_GRAPH_EXEC_UPDATE_ERROR = 0x1, CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2, CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3, CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4, CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5, CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6, CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7; /** * CUDA memory pool attributes ({@code CUmemPool_attribute}) * *
Enum values:
* *
    *
  • {@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES} - * Allow {@link #cuMemAllocAsync MemAllocAsync} to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on * the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies. * *

    (value type = {@code int}, default enabled)

    *
  • *
  • {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC} - Allow reuse of already completed frees when there is no dependency between the free and allocation. (value type = {@code int}, default enabled)
  • *
  • {@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES} - * Allow {@link #cuMemAllocAsync MemAllocAsync} to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by * {@link #cuMemFreeAsync MemFreeAsync}. * *

    (value type = {@code int}, default enabled).

    *
  • *
  • {@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD} - * Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS. * *

    When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the * next call to stream, event or context synchronize.

    * *

    (value type = {@code cuuint64_t}, default 0)

    *
  • *
  • {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT} - Amount of backing memory currently allocated for the mempool. (value type = {@code cuuint64_t})
  • *
  • {@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH} - * High watermark of backing memory allocated for the {@code mempool} since the last time it was reset. High watermark can only be reset to zero. * *

    (value type = {@code cuuint64_t})

    *
  • *
  • {@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT} - Amount of memory from the pool that is currently in use by the application (value type = {@code cuuint64_t}).
  • *
  • {@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH} - * High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only * be reset to zero. * *

    (value type = {@code cuuint64_t})

    *
  • *
*/ public static final int CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1, CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC = 2, CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES = 3, CU_MEMPOOL_ATTR_RELEASE_THRESHOLD = 4, CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT = 5, CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH = 6, CU_MEMPOOL_ATTR_USED_MEM_CURRENT = 7, CU_MEMPOOL_ATTR_USED_MEM_HIGH = 8; /** * {@code CUgraphMem_attribute} * *
Enum values:
* *
    *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT} - (value type = cuuint64_t) Amount of memory, in bytes, currently associated with graphs
  • *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH} - * High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero. * *

    (value type = {@code cuuint64_t})

    *
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT} - Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. (value type = {@code cuuint64_t})
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH} - High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. (value type = {@code cuuint64_t})
  • *
*/ public static final int CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT = 0, CU_GRAPH_MEM_ATTR_USED_MEM_HIGH = 1, CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT = 2, CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH = 3; /** *
Enum values:
* *
    *
  • {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC} - * If set, each kernel launched as part of {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} only waits for prior work in the stream corresponding to that GPU to * complete before the kernel begins execution. *
  • *
  • {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC} - * If set, any subsequent work pushed in a stream that participated in a call to {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} will only wait for the kernel * launched on the GPU corresponding to that stream to complete before it begins execution. *
  • *
*/ public static final int CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC = 0x1, CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC = 0x2; /** *
Enum values:
* *
    *
  • {@link #CUDA_ARRAY3D_LAYERED CUDA_ARRAY3D_LAYERED} - * If set, the CUDA array is a collection of layers, where each layer is either a 1D or a 2D array and the Depth member of {@link CUDA_ARRAY3D_DESCRIPTOR} * specifies the number of layers, not the depth of a 3D array. *
  • *
  • {@link #CUDA_ARRAY3D_2DARRAY CUDA_ARRAY3D_2DARRAY} - Deprecated, use {@link #CUDA_ARRAY3D_LAYERED}.
  • *
  • {@link #CUDA_ARRAY3D_SURFACE_LDST CUDA_ARRAY3D_SURFACE_LDST} - This flag must be set in order to bind a surface reference to the CUDA array.
  • *
  • {@link #CUDA_ARRAY3D_CUBEMAP CUDA_ARRAY3D_CUBEMAP} - * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The width of such a CUDA array must be equal to its height, * and Depth must be six. If {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then the CUDA array is a collection of cubemaps and Depth must be a multiple of * six. *
  • *
  • {@link #CUDA_ARRAY3D_TEXTURE_GATHER CUDA_ARRAY3D_TEXTURE_GATHER} - This flag must be set in order to perform texture gather operations on a CUDA array.
  • *
  • {@link #CUDA_ARRAY3D_DEPTH_TEXTURE CUDA_ARRAY3D_DEPTH_TEXTURE} - This flag if set indicates that the CUDA array is a DEPTH_TEXTURE.
  • *
  • {@link #CUDA_ARRAY3D_COLOR_ATTACHMENT CUDA_ARRAY3D_COLOR_ATTACHMENT} - This flag indicates that the CUDA array may be bound as a color target in an external graphics API.
  • *
  • {@link #CUDA_ARRAY3D_SPARSE CUDA_ARRAY3D_SPARSE} - This flag if set indicates that the CUDA array or CUDA mipmapped array is a sparse CUDA array or CUDA mipmapped array respectively
  • *
*/ public static final int CUDA_ARRAY3D_LAYERED = 0x01, CUDA_ARRAY3D_2DARRAY = 0x01, CUDA_ARRAY3D_SURFACE_LDST = 0x02, CUDA_ARRAY3D_CUBEMAP = 0x04, CUDA_ARRAY3D_TEXTURE_GATHER = 0x08, CUDA_ARRAY3D_DEPTH_TEXTURE = 0x10, CUDA_ARRAY3D_COLOR_ATTACHMENT = 0x20, CUDA_ARRAY3D_SPARSE = 0x40; /** * Flag for {@link #cuTexRefSetArray TexRefSetArray}. * *
Enum values:
* *
    *
  • {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT} - Override the {@code texref} format with a format inferred from the array.
  • *
*/ public static final int CU_TRSA_OVERRIDE_FORMAT = 0x1; /** * Flag for {@link #cuTexRefSetFlags TexRefSetFlags}. * *
Enum values:
* *
    *
  • {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER} - Read the texture as integers rather than promoting the values to floats in the range {@code [0,1]}.
  • *
  • {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES} - Use normalized texture coordinates in the range {@code [0,1)} instead of {@code [0,dim)}.
  • *
  • {@link #CU_TRSF_SRGB TRSF_SRGB} - Perform {@code sRGB->linear} conversion during texture read.
  • *
  • {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION} - Disable any trilinear filtering optimizations.
  • *
*/ public static final int CU_TRSF_READ_AS_INTEGER = 0x01, CU_TRSF_NORMALIZED_COORDINATES = 0x02, CU_TRSF_SRGB = 0x10, CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 0x20; /** End of array terminator for the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel}. */ public static final long CU_LAUNCH_PARAM_END = 0x0L; /** * Indicator that the next value in the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel} will be a pointer to a buffer containing all kernel parameters used for * launching kernel {@code f}. * *

This buffer needs to honor all alignment/padding requirements of the individual parameters. If {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE} is not also specified in the * {@code extra} array, then {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER} will have no effect.

*/ public static final long CU_LAUNCH_PARAM_BUFFER_POINTER = 0x1L; /** * Indicator that the next value in the {@code extra} parameter to {@link #cuLaunchKernel LaunchKernel} will be a pointer to a {@code size_t} which contains the size of the * buffer specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}. * *

It is required that {@code CU_LAUNCH_PARAM_BUFFER_POINTER} also be specified in the {@code extra} array if the value associated with * {@code CU_LAUNCH_PARAM_BUFFER_SIZE} is not zero.

*/ public static final long CU_LAUNCH_PARAM_BUFFER_SIZE = 0x2L; /** For texture references loaded into the module, use default texunit from texture reference. */ public static final int CU_PARAM_TR_DEFAULT = -1; /** Device that represents the CPU. */ public static final int CU_DEVICE_CPU = -1; /** Device that represents an invalid device. */ public static final int CU_DEVICE_INVALID = -2; /** * Bitmasks for {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS}. ({@code CUflushGPUDirectRDMAWritesOptions}) * *
Enum values:
* *
    *
  • {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST} - {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} and its CUDA Runtime API counterpart are supported on the device.
  • *
  • {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS} - The {@link #CU_STREAM_WAIT_VALUE_FLUSH STREAM_WAIT_VALUE_FLUSH} flag and the {@link #CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES STREAM_MEM_OP_FLUSH_REMOTE_WRITES} {@code MemOp} are supported on the device.
  • *
*/ public static final int CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1<<0, CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 1<<1; /** * Platform native ordering for GPUDirect RDMA writes. ({@code CUGPUDirectRDMAWritesOrdering}) * *
Enum values:
* *
    *
  • {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE GPU_DIRECT_RDMA_WRITES_ORDERING_NONE} - The device does not natively support ordering of remote writes. {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} can be leveraged if supported.
  • *
  • {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER} - Natively, the device can consistently consume remote writes, although other CUDA devices may not.
  • *
  • {@link #CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES} - Any CUDA device in the system can consistently consume remote writes to this device.
  • *
*/ public static final int CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0, CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100, CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200; /** * The scopes for {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} ({@code CUflushGPUDirectRDMAWritesScope}) * *
Enum values:
* *
    *
  • {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER} - Blocks until remote writes are visible to the CUDA device context owning the data.
  • *
  • {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES} - Blocks until remote writes are visible to all CUDA device contexts.
  • *
*/ public static final int CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100, CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200; /** * The targets for {@link #cuFlushGPUDirectRDMAWrites FlushGPUDirectRDMAWrites} ({@code CUflushGPUDirectRDMAWritesTarget}) * *
Enum values:
* *
    *
  • {@link #CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX} - Sets the target for {@code cuFlushGPUDirectRDMAWrites()} to the currently active CUDA device context.
  • *
*/ public static final int CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0; /** * The additional write options for {@link #cuGraphDebugDotPrint GraphDebugDotPrint} ({@code CUgraphDebugDot_flags}) * *
Enum values:
* *
    *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE GRAPH_DEBUG_DOT_FLAGS_VERBOSE}
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES} - Output all debug data as if every debug flag is enabled
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS} - Use CUDA Runtime structures for output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS} - Adds {@link CUDA_KERNEL_NODE_PARAMS} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS} - Adds {@link CUDA_MEMCPY3D} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS} - Adds {@link CUDA_MEMSET_NODE_PARAMS} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS} - Adds {@link CUDA_HOST_NODE_PARAMS} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS} - Adds {@code CUevent} handle from record and wait nodes to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS} - Adds {@link CUDA_EXT_SEM_SIGNAL_NODE_PARAMS} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES} - Adds {@link CUDA_EXT_SEM_WAIT_NODE_PARAMS} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES GRAPH_DEBUG_DOT_FLAGS_HANDLES} - Adds {@code CUkernelNodeAttrValue} values to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS} - Adds node handles and every kernel function handle to output
  • *
  • {@link #CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS} - Adds memory alloc node parameters to output
  • *
*/ public static final int CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1<<0, CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 1<<1, CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 1<<2, CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 1<<3, CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 1<<4, CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 1<<5, CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 1<<6, CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 1<<7, CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 1<<8, CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 1<<9, CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1<<10, CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 1<<11, CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 1<<12; /** * Flags for user objects for graphs. ({@code CUuserObject_flags}) * *
Enum values:
* *
    *
  • {@link #CU_USER_OBJECT_NO_DESTRUCTOR_SYNC USER_OBJECT_NO_DESTRUCTOR_SYNC} - Indicates the destructor execution is not synchronized by any CUDA handle.
  • *
*/ public static final int CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1; /** * Flags for retaining user object references for graphs. ({@code CUuserObjectRetain_flags}) * *
Enum values:
* *
    *
  • {@link #CU_GRAPH_USER_OBJECT_MOVE GRAPH_USER_OBJECT_MOVE} - Transfer references from the caller rather than creating new references.
  • *
*/ public static final int CU_GRAPH_USER_OBJECT_MOVE = 1; /** * Flags for instantiating a graph. ({@code CUgraphInstantiate_flags}) * *
Enum values:
* *
    *
  • {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH} - Automatically free memory allocated in a graph before relaunching.
  • *
*/ public static final int CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1; protected CU() { throw new UnsupportedOperationException(); } // --- [ cuGetErrorString ] --- /** Unsafe version of: {@link #cuGetErrorString GetErrorString} */ public static int ncuGetErrorString(int error, long pStr) { long __functionAddress = Functions.GetErrorString; return callPI(error, pStr, __functionAddress); } /** * Gets the string description of an error code. * *

Sets {@code *pStr} to the address of a NULL-terminated string description of the error code {@code error}. If the error code is not recognized, * {@link #CUDA_ERROR_INVALID_VALUE} will be returned and {@code *pStr} will be set to the {@code NULL} address.

* * @param error error code to convert to string * @param pStr address of the string pointer */ @NativeType("CUresult") public static int cuGetErrorString(@NativeType("CUresult") int error, @NativeType("char const **") PointerBuffer pStr) { if (CHECKS) { check(pStr, 1); } return ncuGetErrorString(error, memAddress(pStr)); } // --- [ cuGetErrorName ] --- /** Unsafe version of: {@link #cuGetErrorName GetErrorName} */ public static int ncuGetErrorName(int error, long pStr) { long __functionAddress = Functions.GetErrorName; return callPI(error, pStr, __functionAddress); } /** * Gets the string representation of an error code enum name. * *

Sets {@code *pStr} to the address of a NULL-terminated string representation of the name of the enum error code {@code error}. If the error code is not * recognized, {@link #CUDA_ERROR_INVALID_VALUE} will be returned and {@code *pStr} will be set to the {@code NULL} address.

* * @param error error code to convert to string * @param pStr address of the string pointer */ @NativeType("CUresult") public static int cuGetErrorName(@NativeType("CUresult") int error, @NativeType("char const **") PointerBuffer pStr) { if (CHECKS) { check(pStr, 1); } return ncuGetErrorName(error, memAddress(pStr)); } // --- [ cuInit ] --- /** * Initialize the CUDA driver API. * *

Initializes the driver API and must be called before any other function from the driver API. Currently, the {@code Flags} parameter must be 0. If * {@code cuInit()} has not been called, any function from the driver API will return {@link #CUDA_ERROR_NOT_INITIALIZED}.

* * @param Flags initialization flag for CUDA */ @NativeType("CUresult") public static int cuInit(@NativeType("unsigned int") int Flags) { long __functionAddress = Functions.Init; return callI(Flags, __functionAddress); } // --- [ cuDriverGetVersion ] --- /** Unsafe version of: {@link #cuDriverGetVersion DriverGetVersion} */ public static int ncuDriverGetVersion(long driverVersion) { long __functionAddress = Functions.DriverGetVersion; return callPI(driverVersion, __functionAddress); } /** * Returns the latest CUDA version supported by driver. * *

Returns in {@code *driverVersion} the version of CUDA supported by the driver. The version is returned as ({@code 1000 × major + 10 × minor}). For * example, CUDA 9.2 would be represented by 9020.

* *

This function automatically returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code driverVersion} is {@code NULL}.

* * @param driverVersion returns the CUDA driver version */ @NativeType("CUresult") public static int cuDriverGetVersion(@NativeType("int *") IntBuffer driverVersion) { if (CHECKS) { check(driverVersion, 1); } return ncuDriverGetVersion(memAddress(driverVersion)); } // --- [ cuDeviceGet ] --- /** Unsafe version of: {@link #cuDeviceGet DeviceGet} */ public static int ncuDeviceGet(long device, int ordinal) { long __functionAddress = Functions.DeviceGet; return callPI(device, ordinal, __functionAddress); } /** * Returns a handle to a compute device. * *

Returns in {@code *device} a device handle given an ordinal in the range {@code [0, cuDeviceGetCount()-1]}.

* * @param device returned device handle * @param ordinal device number to get handle for */ @NativeType("CUresult") public static int cuDeviceGet(@NativeType("CUdevice *") IntBuffer device, int ordinal) { if (CHECKS) { check(device, 1); } return ncuDeviceGet(memAddress(device), ordinal); } // --- [ cuDeviceGetCount ] --- /** Unsafe version of: {@link #cuDeviceGetCount DeviceGetCount} */ public static int ncuDeviceGetCount(long count) { long __functionAddress = Functions.DeviceGetCount; return callPI(count, __functionAddress); } /** * Returns the number of compute-capable devices. * *

Returns in {@code *count} the number of devices with compute capability greater than or equal to 2.0 that are available for execution. If there is no * such device, {@code cuDeviceGetCount()} returns 0.

* * @param count returned number of compute-capable devices */ @NativeType("CUresult") public static int cuDeviceGetCount(@NativeType("int *") IntBuffer count) { if (CHECKS) { check(count, 1); } return ncuDeviceGetCount(memAddress(count)); } // --- [ cuDeviceGetName ] --- /** * Unsafe version of: {@link #cuDeviceGetName DeviceGetName} * * @param len maximum length of string to store in {@code name} */ public static int ncuDeviceGetName(long name, int len, int dev) { long __functionAddress = Functions.DeviceGetName; return callPI(name, len, dev, __functionAddress); } /** * Returns an identifer string for the device. * *

Returns an ASCII string identifying the device {@code dev} in the NULL-terminated string pointed to by {@code name}. {@code len} specifies the maximum * length of the string that may be returned.

* * @param name returned identifier string for the device * @param dev device to get identifier string for */ @NativeType("CUresult") public static int cuDeviceGetName(@NativeType("char *") ByteBuffer name, @NativeType("CUdevice") int dev) { return ncuDeviceGetName(memAddress(name), name.remaining(), dev); } // --- [ cuDeviceGetUuid ] --- /** Unsafe version of: {@link #cuDeviceGetUuid DeviceGetUuid} */ public static int ncuDeviceGetUuid(long uuid, int dev) { long __functionAddress = Functions.DeviceGetUuid; if (CHECKS) { check(__functionAddress); } return callPI(uuid, dev, __functionAddress); } /** * Return an UUID for the device. * *

Note there is a later version of this API, {@link #cuDeviceGetUuid_v2 DeviceGetUuid_v2}. It will supplant this version in 12.0, which is retained for minor version. * compatibility.

* *

Returns 16-octets identifing the device {@code dev} in the structure pointed by the {@code uuid}.

* * @param uuid returned UUID * @param dev device to get identifier string for */ @NativeType("CUresult") public static int cuDeviceGetUuid(@NativeType("CUuuid *") CUuuid uuid, @NativeType("CUdevice") int dev) { return ncuDeviceGetUuid(uuid.address(), dev); } // --- [ cuDeviceGetUuid_v2 ] --- /** Unsafe version of: {@link #cuDeviceGetUuid_v2 DeviceGetUuid_v2} */ public static int ncuDeviceGetUuid_v2(long uuid, int dev) { long __functionAddress = Functions.DeviceGetUuid_v2; if (CHECKS) { check(__functionAddress); } return callPI(uuid, dev, __functionAddress); } /** * Return an UUID for the device (11.4+). * *

Returns 16-octets identifing the device {@code dev} in the structure pointed by the {@code uuid}. If the device is in MIG mode, returns its MIG UUID * which uniquely identifies the subscribed MIG compute instance.

* * @param uuid returned UUID * @param dev device to get identifier string for */ @NativeType("CUresult") public static int cuDeviceGetUuid_v2(@NativeType("CUuuid *") CUuuid uuid, @NativeType("CUdevice") int dev) { return ncuDeviceGetUuid_v2(uuid.address(), dev); } // --- [ cuDeviceGetLuid ] --- /** Unsafe version of: {@link #cuDeviceGetLuid DeviceGetLuid} */ public static int ncuDeviceGetLuid(long luid, long deviceNodeMask, int dev) { long __functionAddress = Functions.DeviceGetLuid; if (CHECKS) { check(__functionAddress); } return callPPI(luid, deviceNodeMask, dev, __functionAddress); } /** * Return an LUID and device node mask for the device * *

Return identifying information ({@code luid} and {@code deviceNodeMask}) to allow matching device with graphics APIs.

* * @param luid returned LUID * @param deviceNodeMask returned device node mask * @param dev device to get identifier string for */ @NativeType("CUresult") public static int cuDeviceGetLuid(@NativeType("char *") ByteBuffer luid, @NativeType("unsigned int *") IntBuffer deviceNodeMask, @NativeType("CUdevice") int dev) { if (CHECKS) { check(deviceNodeMask, 1); } return ncuDeviceGetLuid(memAddress(luid), memAddress(deviceNodeMask), dev); } // --- [ cuDeviceTotalMem ] --- /** Unsafe version of: {@link #cuDeviceTotalMem DeviceTotalMem} */ public static int ncuDeviceTotalMem(long bytes, int dev) { long __functionAddress = Functions.DeviceTotalMem; return callPI(bytes, dev, __functionAddress); } /** * Returns the total amount of memory on the device * *

Returns in {@code *bytes} the total amount of memory available on the device {@code dev} in bytes.

* * @param bytes returned memory available on device in bytes * @param dev device handle */ @NativeType("CUresult") public static int cuDeviceTotalMem(@NativeType("size_t *") PointerBuffer bytes, @NativeType("CUdevice") int dev) { if (CHECKS) { check(bytes, 1); } return ncuDeviceTotalMem(memAddress(bytes), dev); } // --- [ cuDeviceGetTexture1DLinearMaxWidth ] --- /** Unsafe version of: {@link #cuDeviceGetTexture1DLinearMaxWidth DeviceGetTexture1DLinearMaxWidth} */ public static int ncuDeviceGetTexture1DLinearMaxWidth(long maxWidthInElements, int format, int numChannels, int dev) { long __functionAddress = Functions.DeviceGetTexture1DLinearMaxWidth; if (CHECKS) { check(__functionAddress); } return callPI(maxWidthInElements, format, numChannels, dev, __functionAddress); } /** * Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size. * *

Returns in {@code maxWidthInElements} the maximum number of texture elements allocatable in a 1D linear texture for given {@code format} and {@code * numChannels}.

* * @param maxWidthInElements returned maximum number of texture elements allocatable for given {@code format} and {@code numChannels} * @param format texture format * @param numChannels number of channels per texture element * @param dev device handle */ @NativeType("CUresult") public static int cuDeviceGetTexture1DLinearMaxWidth(@NativeType("size_t *") PointerBuffer maxWidthInElements, @NativeType("CUarray_format") int format, @NativeType("unsigned int") int numChannels, @NativeType("CUdevice") int dev) { if (CHECKS) { check(maxWidthInElements, 1); } return ncuDeviceGetTexture1DLinearMaxWidth(memAddress(maxWidthInElements), format, numChannels, dev); } // --- [ cuDeviceGetAttribute ] --- /** Unsafe version of: {@link #cuDeviceGetAttribute DeviceGetAttribute} */ public static int ncuDeviceGetAttribute(long pi, int attrib, int dev) { long __functionAddress = Functions.DeviceGetAttribute; return callPI(pi, attrib, dev, __functionAddress); } /** * Returns information about the device. * *

Returns in {@code *pi} the integer value of the attribute {@code attrib} on device {@code dev}. The supported attributes are:

* * @param pi returned device attribute value * @param attrib device attribute to query * @param dev device handle */ @NativeType("CUresult") public static int cuDeviceGetAttribute(@NativeType("int *") IntBuffer pi, @NativeType("CUdevice_attribute") int attrib, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pi, 1); } return ncuDeviceGetAttribute(memAddress(pi), attrib, dev); } // --- [ cuDeviceGetNvSciSyncAttributes ] --- /** Unsafe version of: {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} */ public static int ncuDeviceGetNvSciSyncAttributes(long nvSciSyncAttrList, int dev, int flags) { long __functionAddress = Functions.DeviceGetNvSciSyncAttributes; if (CHECKS) { check(__functionAddress); } return callPI(nvSciSyncAttrList, dev, flags, __functionAddress); } /** * Return {@code NvSciSync} attributes that this device can support. * *

Returns in {@code nvSciSyncAttrList}, the properties of {@code NvSciSync} that this CUDA device, {@code dev} can support. The returned {@code * nvSciSyncAttrList} can be used to create an {@code NvSciSync} object that matches this device's capabilities.

* *

If {@code NvSciSyncAttrKey_RequiredPerm} field in {@code nvSciSyncAttrList} is already set this API will return {@link #CUDA_ERROR_INVALID_VALUE}.

* *

The applications should set {@code nvSciSyncAttrList} to a valid {@code NvSciSyncAttrList} failing which this API will return * {@link #CUDA_ERROR_INVALID_HANDLE}.

* *

The {@code flags} controls how applications intends to use the {@code NvSciSync} created from the {@code nvSciSyncAttrList}. The valid flags are:

* *
    *
  • {@link #CUDA_NVSCISYNC_ATTR_SIGNAL}, specifies that the applications intends to signal an {@code NvSciSync} on this CUDA device.
  • *
  • {@link #CUDA_NVSCISYNC_ATTR_WAIT}, specifies that the applications intends to wait on an {@code NvSciSync} on this CUDA device.
  • *
* *

At least one of these flags must be set, failing which the API returns {@link #CUDA_ERROR_INVALID_VALUE}. Both the flags are orthogonal to one another: a * developer may set both these flags that allows to set both wait and signal specific attributes in the same {@code nvSciSyncAttrList}.

* * @param nvSciSyncAttrList return NvSciSync attributes supported * @param dev valid Cuda Device to get {@code NvSciSync} attributes for * @param flags flags describing {@code NvSciSync} usage */ @NativeType("CUresult") public static int cuDeviceGetNvSciSyncAttributes(@NativeType("void *") ByteBuffer nvSciSyncAttrList, @NativeType("CUdevice") int dev, int flags) { return ncuDeviceGetNvSciSyncAttributes(memAddress(nvSciSyncAttrList), dev, flags); } // --- [ cuDeviceSetMemPool ] --- /** * Sets the current memory pool of a device * *

The memory pool must be local to the specified device. {@link #cuMemAllocAsync MemAllocAsync} allocates from the current mempool of the provided stream's device. By default, * a device's current memory pool is its default memory pool.

* *
Note
* *

Use {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} to specify asynchronous allocations from a device different than the one the stream runs on.

*/ @NativeType("CUresult") public static int cuDeviceSetMemPool(@NativeType("CUdevice") int dev, @NativeType("CUmemoryPool") long pool) { long __functionAddress = Functions.DeviceSetMemPool; if (CHECKS) { check(__functionAddress); check(pool); } return callPI(dev, pool, __functionAddress); } // --- [ cuDeviceGetMemPool ] --- /** Unsafe version of: {@link #cuDeviceGetMemPool DeviceGetMemPool} */ public static int ncuDeviceGetMemPool(long pool, int dev) { long __functionAddress = Functions.DeviceGetMemPool; if (CHECKS) { check(__functionAddress); } return callPI(pool, dev, __functionAddress); } /** * Gets the current mempool for a device. * *

Returns the last pool provided to {@link #cuDeviceSetMemPool DeviceSetMemPool} for this device or the device's default memory pool if {@link #cuDeviceSetMemPool DeviceSetMemPool} has never been called. * By default the current mempool is the default mempool for a device. Otherwise the returned pool must have been set with {@link #cuDeviceSetMemPool DeviceSetMemPool}.

*/ @NativeType("CUresult") public static int cuDeviceGetMemPool(@NativeType("CUmemoryPool *") PointerBuffer pool, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pool, 1); } return ncuDeviceGetMemPool(memAddress(pool), dev); } // --- [ cuDeviceGetDefaultMemPool ] --- /** Unsafe version of: {@link #cuDeviceGetDefaultMemPool DeviceGetDefaultMemPool} */ public static int ncuDeviceGetDefaultMemPool(long pool_out, int dev) { long __functionAddress = Functions.DeviceGetDefaultMemPool; if (CHECKS) { check(__functionAddress); } return callPI(pool_out, dev, __functionAddress); } /** * Returns the default mempool of a device. * *

The default mempool of a device contains device memory from that device.

*/ @NativeType("CUresult") public static int cuDeviceGetDefaultMemPool(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pool_out, 1); } return ncuDeviceGetDefaultMemPool(memAddress(pool_out), dev); } // --- [ cuFlushGPUDirectRDMAWrites ] --- /** * Blocks until remote writes are visible to the specified scope. * *

Blocks until GPUDirect RDMA writes to the target context via mappings created through APIs like nvidia_p2p_get_pages (see * https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are visible to the specified scope.

* *

If the scope equals or lies within the scope indicated by {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING}, the call will be a no-op and can be safely * omitted for performance. This can be determined by comparing the numerical values between the two enums, with smaller scopes having smaller values.

* *

Users may query support for this API via {@link #CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS}.

* * @param target the target of the operation, see {@code CUflushGPUDirectRDMAWritesTarget} * @param scope the scope of the operation, see {@code CUflushGPUDirectRDMAWritesScope} */ @NativeType("CUresult") public static int cuFlushGPUDirectRDMAWrites(@NativeType("CUflushGPUDirectRDMAWritesTarget") int target, @NativeType("CUflushGPUDirectRDMAWritesScope") int scope) { long __functionAddress = Functions.FlushGPUDirectRDMAWrites; if (CHECKS) { check(__functionAddress); } return callI(target, scope, __functionAddress); } // --- [ cuDeviceGetProperties ] --- /** Unsafe version of: {@link #cuDeviceGetProperties DeviceGetProperties} */ public static int ncuDeviceGetProperties(long prop, int dev) { long __functionAddress = Functions.DeviceGetProperties; return callPI(prop, dev, __functionAddress); } /** * Returns properties for a selected device. * *

Deprecated: This function was deprecated as of CUDA 5.0 and replaced by {@link #cuDeviceGetAttribute DeviceGetAttribute}.

* *

Returns in {@code *prop} the properties of device {@code dev}.

* * @param prop returned properties of device * @param dev device to get properties for */ @NativeType("CUresult") public static int cuDeviceGetProperties(@NativeType("CUdevprop *") CUdevprop prop, @NativeType("CUdevice") int dev) { return ncuDeviceGetProperties(prop.address(), dev); } // --- [ cuDeviceComputeCapability ] --- /** Unsafe version of: {@link #cuDeviceComputeCapability DeviceComputeCapability} */ public static int ncuDeviceComputeCapability(long major, long minor, int dev) { long __functionAddress = Functions.DeviceComputeCapability; return callPPI(major, minor, dev, __functionAddress); } /** * Returns the compute capability of the device. * *

Deprecated: This function was deprecated as of CUDA 5.0 and its functionality superceded by {@link #cuDeviceGetAttribute DeviceGetAttribute}.

* *

Returns in {@code *major} and {@code *minor} the major and minor revision numbers that define the compute capability of the device {@code dev}.

* * @param major major revision number * @param minor minor revision number * @param dev device handle */ @NativeType("CUresult") public static int cuDeviceComputeCapability(@NativeType("int *") IntBuffer major, @NativeType("int *") IntBuffer minor, @NativeType("CUdevice") int dev) { if (CHECKS) { check(major, 1); check(minor, 1); } return ncuDeviceComputeCapability(memAddress(major), memAddress(minor), dev); } // --- [ cuDevicePrimaryCtxRetain ] --- /** Unsafe version of: {@link #cuDevicePrimaryCtxRetain DevicePrimaryCtxRetain} */ public static int ncuDevicePrimaryCtxRetain(long pctx, int dev) { long __functionAddress = Functions.DevicePrimaryCtxRetain; if (CHECKS) { check(__functionAddress); } return callPI(pctx, dev, __functionAddress); } /** * Retain the primary context on the GPU. * *

Retains the primary context on the device. Once the user successfully retains the primary context, the primary context will be active and available to * the user until the user releases it with {@link #cuDevicePrimaryCtxRelease DevicePrimaryCtxRelease} or resets it with {@link #cuDevicePrimaryCtxReset DevicePrimaryCtxReset}. Unlike {@link #cuCtxCreate CtxCreate} the newly retained * context is not pushed onto the stack.

* *

Retaining the primary context for the first time will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The * function {@link #cuDeviceGetAttribute DeviceGetAttribute} can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool * can be used to set the compute mode for devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.

* *

Please note that the primary context always supports pinned allocations. Other flags can be specified by {@link #cuDevicePrimaryCtxSetFlags DevicePrimaryCtxSetFlags}.

* * @param pctx returned context handle of the new context * @param dev device for which primary context is requested */ @NativeType("CUresult") public static int cuDevicePrimaryCtxRetain(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pctx, 1); } return ncuDevicePrimaryCtxRetain(memAddress(pctx), dev); } // --- [ cuDevicePrimaryCtxRelease ] --- /** * Release the primary context on the GPU. * *

Releases the primary context interop on the device. A retained context should always be released once the user is done using it. The context is * automatically reset once the last reference to it is released. This behavior is different when the primary context was retained by the CUDA runtime * from CUDA 4.0 and earlier. In this case, the primary context remains always active.

* *

Releasing a primary context that has not been previously retained will fail with {@link #CUDA_ERROR_INVALID_CONTEXT}.

* *

Please note that unlike {@link #cuCtxDestroy CtxDestroy} this method does not pop the context from stack in any circumstances.

* * @param dev device which primary context is released */ @NativeType("CUresult") public static int cuDevicePrimaryCtxRelease(@NativeType("CUdevice") int dev) { long __functionAddress = Functions.DevicePrimaryCtxRelease; if (CHECKS) { check(__functionAddress); } return callI(dev, __functionAddress); } // --- [ cuDevicePrimaryCtxSetFlags ] --- /** * Set flags for the primary context. * *

Sets the flags for the primary context on the device overwriting perviously set ones.

* *

The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call, * interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:

* *
    *
  • {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but * may lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
  • *
  • {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, * but can increase the performance of CPU threads performing work in parallel with the GPU.
  • *
  • {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
  • *
  • {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work. * *

    Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.

  • *
  • {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the * process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS * threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor * ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose * {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
  • *
  • {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local * memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage. * *

    Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled.

  • *
* * @param dev device for which the primary context flags are set * @param flags new flags for the device */ @NativeType("CUresult") public static int cuDevicePrimaryCtxSetFlags(@NativeType("CUdevice") int dev, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.DevicePrimaryCtxSetFlags; if (CHECKS) { check(__functionAddress); } return callI(dev, flags, __functionAddress); } // --- [ cuDevicePrimaryCtxGetState ] --- /** Unsafe version of: {@link #cuDevicePrimaryCtxGetState DevicePrimaryCtxGetState} */ public static int ncuDevicePrimaryCtxGetState(int dev, long flags, long active) { long __functionAddress = Functions.DevicePrimaryCtxGetState; if (CHECKS) { check(__functionAddress); } return callPPI(dev, flags, active, __functionAddress); } /** * Get the state of the primary context. * *

Returns in {@code *flags} the flags for the primary context of {@code dev}, and in {@code *active} whether it is active. See * {@link #cuDevicePrimaryCtxSetFlags DevicePrimaryCtxSetFlags} for flag values.

* * @param dev device to get primary context flags for * @param flags pointer to store flags * @param active pointer to store context state; 0 = inactive, 1 = active */ @NativeType("CUresult") public static int cuDevicePrimaryCtxGetState(@NativeType("CUdevice") int dev, @NativeType("unsigned int *") IntBuffer flags, @NativeType("int *") IntBuffer active) { if (CHECKS) { check(flags, 1); check(active, 1); } return ncuDevicePrimaryCtxGetState(dev, memAddress(flags), memAddress(active)); } // --- [ cuDevicePrimaryCtxReset ] --- /** * Destroy all allocations and reset all state on the primary context. * *

Explicitly destroys and cleans up all resources associated with the current device in the current process.

* *

Note that it is responsibility of the calling function to ensure that no other module in the process is using the device any more. For that reason it * is recommended to use {@link #cuDevicePrimaryCtxRelease DevicePrimaryCtxRelease} in most cases. However it is safe for other modules to call {@code cuDevicePrimaryCtxRelease()} even * after resetting the device. Resetting the primary context does not release it, an application that has retained the primary context should explicitly * release its usage.

* * @param dev device for which primary context is destroyed */ @NativeType("CUresult") public static int cuDevicePrimaryCtxReset(@NativeType("CUdevice") int dev) { long __functionAddress = Functions.DevicePrimaryCtxReset; if (CHECKS) { check(__functionAddress); } return callI(dev, __functionAddress); } // --- [ cuDeviceGetExecAffinitySupport ] --- /** Unsafe version of: {@link #cuDeviceGetExecAffinitySupport DeviceGetExecAffinitySupport} */ public static int ncuDeviceGetExecAffinitySupport(long pi, int type, int dev) { long __functionAddress = Functions.DeviceGetExecAffinitySupport; if (CHECKS) { check(__functionAddress); } return callPI(pi, type, dev, __functionAddress); } /** * Returns information about the execution affinity support of the device. * *

Returns in {@code *pi} whether execution affinity type {@code type} is supported by device {@code dev}. The supported types are:

* *
    *
  • {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT}: 1 if context with limited SMs is supported by the device, or 0 if not;
  • *
* * @param pi 1 if the execution affinity type {@code type} is supported by the device, or 0 if not * @param type execution affinity type to query * @param dev device handle */ @NativeType("CUresult") public static int cuDeviceGetExecAffinitySupport(@NativeType("int *") IntBuffer pi, @NativeType("CUexecAffinityType") int type, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pi, 1); } return ncuDeviceGetExecAffinitySupport(memAddress(pi), type, dev); } // --- [ cuCtxCreate ] --- /** Unsafe version of: {@link #cuCtxCreate CtxCreate} */ public static int ncuCtxCreate(long pctx, int flags, int dev) { long __functionAddress = Functions.CtxCreate; return callPI(pctx, flags, dev, __functionAddress); } /** * Create a CUDA context. * *
Note
* *

In most cases it is recommended to use {@link #cuDevicePrimaryCtxRetain DevicePrimaryCtxRetain}.

* *

Creates a new CUDA context and associates it with the calling thread. The {@code flags} parameter is described below. The context is created with a * usage count of 1 and the caller of {@code cuCtxCreate()} must call {@link #cuCtxDestroy CtxDestroy} or when done using the context. If a context is already current to the * thread, it is supplanted by the newly created context and may be restored by a subsequent call to {@link #cuCtxPopCurrent CtxPopCurrent}.

* *

The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call, * interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:

* *
    *
  • {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but may * lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
  • *
  • {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, but * can increase the performance of CPU threads performing work in parallel with the GPU.
  • *
  • {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
  • *
  • {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work. * *

    Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.

  • *
  • {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the * process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS * threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor * ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose * {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
  • *
  • {@link #CU_CTX_MAP_HOST CTX_MAP_HOST}: Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is * accessible to the GPU.
  • *
  • {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local * memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage. * *

    Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled. Instead, the * per-thread stack size can be controlled with {@link #cuCtxSetLimit CtxSetLimit}.

  • *
* *

Context creation will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The function {@link #cuDeviceGetAttribute DeviceGetAttribute} * can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool can be used to set the compute * mode for * devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.

* * @param pctx returned context handle of the new context * @param flags context creation flags * @param dev device to create context on */ @NativeType("CUresult") public static int cuCtxCreate(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("unsigned int") int flags, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pctx, 1); } return ncuCtxCreate(memAddress(pctx), flags, dev); } // --- [ cuCtxCreate_v3 ] --- /** * Unsafe version of: {@link #cuCtxCreate_v3 CtxCreate_v3} * * @param numParams number of execution affinity parameters */ public static int ncuCtxCreate_v3(long pctx, long paramsArray, int numParams, int flags, int dev) { long __functionAddress = Functions.CtxCreate_v3; if (CHECKS) { check(__functionAddress); } return callPPI(pctx, paramsArray, numParams, flags, dev, __functionAddress); } /** * Create a CUDA context with execution affinity. * *

Creates a new CUDA context with execution affinity and associates it with the calling thread. The {@code paramsArray} and {@code flags} parameter are * described below. The context is created with a usage count of 1 and the caller of {@link #cuCtxCreate CtxCreate} must call {@link #cuCtxDestroy CtxDestroy} or when done using the * context. If a context is already current to the thread, it is supplanted by the newly created context and may be restored by a subsequent call to * {@link #cuCtxPopCurrent CtxPopCurrent}.

* *

The type and the amount of execution resource the context can use is limited by {@code paramsArray} and {@code numParams}. The {@code paramsArray} is * an array of {@code CUexecAffinityParam} and the {@code numParams} describes the size of the array. If two {@code CUexecAffinityParam} in the array have * the same type, the latter execution affinity parameter overrides the former execution affinity parameter. The supported execution affinity types are:

* *
    *
  • {@link #CU_EXEC_AFFINITY_TYPE_SM_COUNT EXEC_AFFINITY_TYPE_SM_COUNT} limits the portion of SMs that the context can use. The portion of SMs is specified as the number of SMs via * {@link CUexecAffinitySmCount}. This limit will be internally rounded up to the next hardware-supported amount. Hence, it is imperative to query the * actual execution affinity of the context via {@link #cuCtxGetExecAffinity CtxGetExecAffinity}) after context creation. Currently, this attribute is only supported under * Volta+ MPS.
  • *
* *

The three LSBs of the {@code flags} parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call, * interacts with the OS scheduler when waiting for results from the GPU. Only one of the scheduling flags can be set when creating a context:

* *
    *
  • {@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}: Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease latency when waiting for the GPU, but may * lower the performance of CPU threads if they are performing work in parallel with the CUDA thread.
  • *
  • {@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}: Instruct CUDA to yield its thread when waiting for results from the GPU. This can increase latency when waiting for the GPU, but * can increase the performance of CPU threads performing work in parallel with the GPU.
  • *
  • {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work.
  • *
  • {@link #CU_CTX_BLOCKING_SYNC CTX_BLOCKING_SYNC}: Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work. * *

    Deprecated: This flag was deprecated as of CUDA 4.0 and was replaced with {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC}.

  • *
  • {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO}: The default value if the {@code flags} parameter is zero, uses a heuristic based on the number of active CUDA contexts in the * process C and the number of logical processors in the system P. If C > P, then CUDA will yield to other OS * threads when waiting for the GPU ({@link #CU_CTX_SCHED_YIELD CTX_SCHED_YIELD}), otherwise CUDA will not yield while waiting for results and actively spin on the processor * ({@link #CU_CTX_SCHED_SPIN CTX_SCHED_SPIN}). Additionally, on Tegra devices, {@link #CU_CTX_SCHED_AUTO CTX_SCHED_AUTO} uses a heuristic based on the power profile of the platform and may choose * {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} for low-powered devices.
  • *
  • {@link #CU_CTX_MAP_HOST CTX_MAP_HOST}: Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is accessible * to the GPU.
  • *
  • {@link #CU_CTX_LMEM_RESIZE_TO_MAX CTX_LMEM_RESIZE_TO_MAX}: Instruct CUDA to not reduce local memory after resizing local memory for a kernel. This can prevent thrashing by local * memory allocations when launching many kernels with high local memory usage at the cost of potentially increased memory usage. * *

    Deprecated: This flag is deprecated and the behavior enabled by this flag is now the default and cannot be disabled. Instead, the * per-thread stack size can be controlled with {@link #cuCtxSetLimit CtxSetLimit}.

  • *
* *

Context creation will fail with {@link #CUDA_ERROR_UNKNOWN} if the compute mode of the device is {@link #CU_COMPUTEMODE_PROHIBITED COMPUTEMODE_PROHIBITED}. The function {@link #cuDeviceGetAttribute DeviceGetAttribute} * can be used with {@link #CU_DEVICE_ATTRIBUTE_COMPUTE_MODE DEVICE_ATTRIBUTE_COMPUTE_MODE} to determine the compute mode of the device. The nvidia-smi tool can be used to set the compute * mode for * devices. Documentation for nvidia-smi can be obtained by passing a -h option to it.

* * @param pctx returned context handle of the new context * @param paramsArray execution affinity parameters * @param flags context creation flags * @param dev device to create context on */ @NativeType("CUresult") public static int cuCtxCreate_v3(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("CUexecAffinityParam *") CUexecAffinityParam.Buffer paramsArray, @NativeType("unsigned int") int flags, @NativeType("CUdevice") int dev) { if (CHECKS) { check(pctx, 1); } return ncuCtxCreate_v3(memAddress(pctx), paramsArray.address(), paramsArray.remaining(), flags, dev); } // --- [ cuCtxDestroy ] --- /** * Destroy a CUDA context. * *

Destroys the CUDA context specified by {@code ctx}. The context {@code ctx} will be destroyed regardless of how many threads it is current to. It is * the responsibility of the calling function to ensure that no API call issues using {@code ctx} while {@code cuCtxDestroy()} is executing.

* *

Destroys and cleans up all resources associated with the context. It is the caller's responsibility to ensure that the context or its resources are not * accessed or passed in subsequent API calls and doing so will result in undefined behavior. These resources include CUDA types such as {@code CUmodule}, * {@code CUfunction}, {@code CUstream}, {@code CUevent}, {@code CUarray}, {@code CUmipmappedArray}, {@code CUtexObject}, {@code CUsurfObject}, * {@code CUtexref}, {@code CUsurfref}, {@code CUgraphicsResource}, {@code CUlinkState}, {@code CUexternalMemory} and {@code CUexternalSemaphore}.

* *

If {@code ctx} is current to the calling thread then {@code ctx} will also be popped from the current thread's context stack (as though * {@link #cuCtxPopCurrent CtxPopCurrent} were called). If {@code ctx} is current to other threads, then {@code ctx} will remain current to those threads, and attempting to * access {@code ctx} from those threads will result in the error {@link #CUDA_ERROR_CONTEXT_IS_DESTROYED}.

* * @param ctx context to destroy */ @NativeType("CUresult") public static int cuCtxDestroy(@NativeType("CUcontext") long ctx) { long __functionAddress = Functions.CtxDestroy; if (CHECKS) { check(__functionAddress); check(ctx); } return callPI(ctx, __functionAddress); } // --- [ cuCtxPushCurrent ] --- /** * Pushes a context on the current CPU thread. * *

Pushes the given context {@code ctx} onto the CPU thread's stack of current contexts. The specified context becomes the CPU thread's current context, * so all CUDA functions that operate on the current context are affected.

* *

The previous current context may be made current again by calling {@link #cuCtxDestroy CtxDestroy} or {@link #cuCtxPopCurrent CtxPopCurrent}.

* * @param ctx context to push */ @NativeType("CUresult") public static int cuCtxPushCurrent(@NativeType("CUcontext") long ctx) { long __functionAddress = Functions.CtxPushCurrent; if (CHECKS) { check(__functionAddress); check(ctx); } return callPI(ctx, __functionAddress); } // --- [ cuCtxPopCurrent ] --- /** Unsafe version of: {@link #cuCtxPopCurrent CtxPopCurrent} */ public static int ncuCtxPopCurrent(long pctx) { long __functionAddress = Functions.CtxPopCurrent; if (CHECKS) { check(__functionAddress); } return callPI(pctx, __functionAddress); } /** * Pops the current CUDA context from the current CPU thread. * *

Pops the current CUDA context from the CPU thread and passes back the old context handle in {@code *pctx}. That context may then be made current to a * different CPU thread by calling {@link #cuCtxPushCurrent CtxPushCurrent}.

* *

If a context was current to the CPU thread before {@link #cuCtxCreate CtxCreate} or {@link #cuCtxPushCurrent CtxPushCurrent} was called, this function makes that context current to the CPU * thread again.

* * @param pctx returned new context handle */ @NativeType("CUresult") public static int cuCtxPopCurrent(@NativeType("CUcontext *") PointerBuffer pctx) { if (CHECKS) { check(pctx, 1); } return ncuCtxPopCurrent(memAddress(pctx)); } // --- [ cuCtxSetCurrent ] --- /** * Binds the specified CUDA context to the calling CPU thread. * *

Binds the specified CUDA context to the calling CPU thread. If {@code ctx} is {@code NULL} then the CUDA context previously bound to the calling CPU thread is * unbound and {@link #CUDA_SUCCESS} is returned.

* *

If there exists a CUDA context stack on the calling CPU thread, this will replace the top of that stack with {@code ctx}. If {@code ctx} is {@code NULL} then * this will be equivalent to popping the top of the calling CPU thread's CUDA context stack (or a no-op if the calling CPU thread's CUDA context stack is * empty).

* * @param ctx context to bind to the calling CPU thread */ @NativeType("CUresult") public static int cuCtxSetCurrent(@NativeType("CUcontext") long ctx) { long __functionAddress = Functions.CtxSetCurrent; if (CHECKS) { check(__functionAddress); check(ctx); } return callPI(ctx, __functionAddress); } // --- [ cuCtxGetCurrent ] --- /** Unsafe version of: {@link #cuCtxGetCurrent CtxGetCurrent} */ public static int ncuCtxGetCurrent(long pctx) { long __functionAddress = Functions.CtxGetCurrent; if (CHECKS) { check(__functionAddress); } return callPI(pctx, __functionAddress); } /** * Returns the CUDA context bound to the calling CPU thread. * *

Returns in {@code *pctx} the CUDA context bound to the calling CPU thread. If no context is bound to the calling CPU thread then {@code *pctx} is set * to {@code NULL} and {@link #CUDA_SUCCESS} is returned.

* * @param pctx returned context handle */ @NativeType("CUresult") public static int cuCtxGetCurrent(@NativeType("CUcontext *") PointerBuffer pctx) { if (CHECKS) { check(pctx, 1); } return ncuCtxGetCurrent(memAddress(pctx)); } // --- [ cuCtxGetDevice ] --- /** Unsafe version of: {@link #cuCtxGetDevice CtxGetDevice} */ public static int ncuCtxGetDevice(long device) { long __functionAddress = Functions.CtxGetDevice; return callPI(device, __functionAddress); } /** * Returns the device ID for the current context. * *

Returns in {@code *device} the ordinal of the current context's device.

* * @param device returned device ID for the current context */ @NativeType("CUresult") public static int cuCtxGetDevice(@NativeType("CUdevice *") IntBuffer device) { if (CHECKS) { check(device, 1); } return ncuCtxGetDevice(memAddress(device)); } // --- [ cuCtxGetFlags ] --- /** Unsafe version of: {@link #cuCtxGetFlags CtxGetFlags} */ public static int ncuCtxGetFlags(long flags) { long __functionAddress = Functions.CtxGetFlags; if (CHECKS) { check(__functionAddress); } return callPI(flags, __functionAddress); } /** * Returns the flags for the current context. * *

Returns in {@code *flags} the flags of the current context. See {@link #cuCtxCreate CtxCreate} for flag values.

* * @param flags pointer to store flags of current context */ @NativeType("CUresult") public static int cuCtxGetFlags(@NativeType("unsigned int *") IntBuffer flags) { if (CHECKS) { check(flags, 1); } return ncuCtxGetFlags(memAddress(flags)); } // --- [ cuCtxSynchronize ] --- /** * Block for a context's tasks to complete. * *

Blocks until the device has completed all preceding requested tasks. {@code cuCtxSynchronize()} returns an error if one of the preceding tasks failed. * If the context was created with the {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} flag, the CPU thread will block until the GPU context has finished its work.

*/ @NativeType("CUresult") public static int cuCtxSynchronize() { long __functionAddress = Functions.CtxSynchronize; return callI(__functionAddress); } // --- [ cuCtxSetLimit ] --- /** * Set resource limits. * *

Setting {@code limit} to {@code value} is a request by the application to update the current limit maintained by the context. The driver is free to * modify the requested value to meet h/w requirements (this could be clamping to minimum or maximum values, rounding up to nearest element size, etc). * The application can use {@link #cuCtxGetLimit CtxGetLimit} to find out exactly what the limit has been set to.

* *

Setting each {@code CUlimit} has its own specific restrictions, so each is discussed here.

* *
    *
  • {@link #CU_LIMIT_STACK_SIZE LIMIT_STACK_SIZE} controls the stack size in bytes of each GPU thread. The driver automatically increases the per-thread stack size for each kernel * launch as needed. This size isn't reset back to the original value after each launch. Setting this value will take effect immediately, and if * necessary, the device will block until all preceding requested tasks are complete.
  • *
  • {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE} controls the size in bytes of the FIFO used by the {@code printf()} device system call. Setting {@link #CU_LIMIT_PRINTF_FIFO_SIZE LIMIT_PRINTF_FIFO_SIZE} * must be performed before launching any kernel that uses the {@code printf()} device system call, otherwise {@link #CUDA_ERROR_INVALID_VALUE} will be * returned.
  • *
  • {@link #CU_LIMIT_MALLOC_HEAP_SIZE LIMIT_MALLOC_HEAP_SIZE} controls the size in bytes of the heap used by the {@code malloc()} and {@code free()} device system calls. Setting * {@code CU_LIMIT_MALLOC_HEAP_SIZE} must be performed before launching any kernel that uses the {@code malloc()} or {@code free()} device system * calls, otherwise {@link #CUDA_ERROR_INVALID_VALUE} will be returned.
  • *
  • {@link #CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH LIMIT_DEV_RUNTIME_SYNC_DEPTH} controls the maximum nesting depth of a grid at which a thread can safely call {@code cudaDeviceSynchronize()}. * Setting this limit must be performed before any launch of a kernel that uses the device runtime and calls {@code cudaDeviceSynchronize()} above the * default sync depth, two levels of grids. Calls to {@code cudaDeviceSynchronize()} will fail with error code {@code cudaErrorSyncDepthExceeded} if * the limitation is violated. This limit can be set smaller than the default or up the maximum launch depth of 24. When setting this limit, keep in * mind that additional levels of sync depth require the driver to reserve large amounts of device memory which can no longer be used for user * allocations. If these reservations of device memory fail, {@code cuCtxSetLimit()} will return {@link #CUDA_ERROR_OUT_OF_MEMORY}, and the limit can be reset * to a lower value. This limit is only applicable to devices of compute capability 3.5 and higher. Attempting to set this limit on devices of compute * capability less than 3.5 will result in the error {@link #CUDA_ERROR_UNSUPPORTED_LIMIT} being returned.
  • *
  • {@link #CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT} controls the maximum number of outstanding device runtime launches that can be made from the current * context. A grid is outstanding from the point of launch up until the grid is known to have been completed. Device runtime launches which violate * this limitation fail and return {@code cudaErrorLaunchPendingCountExceeded} when {@code cudaGetLastError()} is called after launch. If more pending * launches than the default (2048 launches) are needed for a module using the device runtime, this limit can be increased. Keep in mind that being * able to sustain additional pending launches will require the driver to reserve larger amounts of device memory upfront which can no longer be used * for allocations. If these reservations fail, {@code cuCtxSetLimit()} will return {@link #CUDA_ERROR_OUT_OF_MEMORY}, and the limit can be reset to a lower * value. This limit is only applicable to devices of compute capability 3.5 and higher. Attempting to set this limit on devices of compute capability * less than 3.5 will result in the error {@link #CUDA_ERROR_UNSUPPORTED_LIMIT} being returned.
  • *
  • {@link #CU_LIMIT_MAX_L2_FETCH_GRANULARITY LIMIT_MAX_L2_FETCH_GRANULARITY} controls the L2 cache fetch granularity. Values can range from 0B to 128B. This is purely a performence hint and it * can be ignored or clamped depending on the platform.
  • *
  • {@link #CU_LIMIT_PERSISTING_L2_CACHE_SIZE LIMIT_PERSISTING_L2_CACHE_SIZE} controls size in bytes availabe for persisting L2 cache. This is purely a performance hint and it can be ignored or * clamped depending on the platform.
  • *
* * @param limit limit to set * @param value size of limit */ @NativeType("CUresult") public static int cuCtxSetLimit(@NativeType("CUlimit") int limit, @NativeType("size_t") long value) { long __functionAddress = Functions.CtxSetLimit; return callPI(limit, value, __functionAddress); } // --- [ cuCtxGetLimit ] --- /** Unsafe version of: {@link #cuCtxGetLimit CtxGetLimit} */ public static int ncuCtxGetLimit(long pvalue, int limit) { long __functionAddress = Functions.CtxGetLimit; return callPI(pvalue, limit, __functionAddress); } /** * Returns resource limits. * *

Returns in {@code *pvalue} the current size of {@code limit}.

* * @param pvalue returned size of limit * @param limit limit to query */ @NativeType("CUresult") public static int cuCtxGetLimit(@NativeType("size_t *") PointerBuffer pvalue, @NativeType("CUlimit") int limit) { if (CHECKS) { check(pvalue, 1); } return ncuCtxGetLimit(memAddress(pvalue), limit); } // --- [ cuCtxGetCacheConfig ] --- /** Unsafe version of: {@link #cuCtxGetCacheConfig CtxGetCacheConfig} */ public static int ncuCtxGetCacheConfig(long pconfig) { long __functionAddress = Functions.CtxGetCacheConfig; return callPI(pconfig, __functionAddress); } /** * Returns the preferred cache configuration for the current context. * *

On devices where the L1 cache and shared memory use the same hardware resources, this function returns through {@code pconfig} the preferred cache * configuration for the current context. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose * a different configuration if required to execute functions.

* *

This will return a {@code pconfig} of {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} on devices where the size of the L1 cache and shared memory are fixed.

* * @param pconfig returned cache configuration */ @NativeType("CUresult") public static int cuCtxGetCacheConfig(@NativeType("CUfunc_cache *") IntBuffer pconfig) { if (CHECKS) { check(pconfig, 1); } return ncuCtxGetCacheConfig(memAddress(pconfig)); } // --- [ cuCtxSetCacheConfig ] --- /** * Sets the preferred cache configuration for the current context. * *

On devices where the L1 cache and shared memory use the same hardware resources, this sets through {@code config} the preferred cache configuration for * the current context. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose a different * configuration if required to execute the function. Any function preference set via {@code cuFuncSetCacheConfig()} will be preferred over this * context-wide setting. Setting the context-wide cache configuration to {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE} will cause subsequent kernel launches to prefer to not * change the cache configuration unless required to launch the kernel.

* *

This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.

* *

Launching a kernel with a different preference than the most recent preference setting may insert a device-side synchronization point.

* * @param config requested cache configuration */ @NativeType("CUresult") public static int cuCtxSetCacheConfig(@NativeType("CUfunc_cache") int config) { long __functionAddress = Functions.CtxSetCacheConfig; return callI(config, __functionAddress); } // --- [ cuCtxGetSharedMemConfig ] --- /** Unsafe version of: {@link #cuCtxGetSharedMemConfig CtxGetSharedMemConfig} */ public static int ncuCtxGetSharedMemConfig(long pConfig) { long __functionAddress = Functions.CtxGetSharedMemConfig; if (CHECKS) { check(__functionAddress); } return callPI(pConfig, __functionAddress); } /** * Returns the current shared memory configuration for the current context. * *

This function will return in {@code pConfig} the current size of shared memory banks in the current context. On devices with configurable shared memory * banks, {@link #cuCtxSetSharedMemConfig CtxSetSharedMemConfig} can be used to change this setting, so that all subsequent kernel launches will by default use the new bank size. * When {@code cuCtxGetSharedMemConfig} is called on devices without configurable shared memory, it will return the fixed bank size of the hardware.

* * @param pConfig returned shared memory configuration */ @NativeType("CUresult") public static int cuCtxGetSharedMemConfig(@NativeType("CUsharedconfig *") IntBuffer pConfig) { if (CHECKS) { check(pConfig, 1); } return ncuCtxGetSharedMemConfig(memAddress(pConfig)); } // --- [ cuCtxSetSharedMemConfig ] --- /** * Sets the shared memory configuration for the current context. * *

On devices with configurable shared memory banks, this function will set the context's shared memory bank size which is used for subsequent kernel * launches.

* *

Changed the shared memory configuration between launches may insert a device side synchronization point between those launches.

* *

Changing the shared memory bank size will not increase shared memory usage or affect occupancy of kernels, but may have major effects on performance. * Larger bank sizes will allow for greater potential bandwidth to shared memory, but will change what kinds of accesses to shared memory will result in * bank conflicts.

* *

This function will do nothing on devices with fixed shared memory bank size.

* * @param config requested shared memory configuration */ @NativeType("CUresult") public static int cuCtxSetSharedMemConfig(@NativeType("CUsharedconfig") int config) { long __functionAddress = Functions.CtxSetSharedMemConfig; if (CHECKS) { check(__functionAddress); } return callI(config, __functionAddress); } // --- [ cuCtxGetApiVersion ] --- /** Unsafe version of: {@link #cuCtxGetApiVersion CtxGetApiVersion} */ public static int ncuCtxGetApiVersion(long ctx, long version) { long __functionAddress = Functions.CtxGetApiVersion; return callPPI(ctx, version, __functionAddress); } /** * Gets the context's API version. * *

Returns a version number in {@code version} corresponding to the capabilities of the context (e.g. 3010 or 3020), which library developers can use to * direct callers to a specific API version. If {@code ctx} is {@code NULL}, returns the API version used to create the currently bound context.

* *

Note that new API versions are only introduced when context capabilities are changed that break binary compatibility, so the API version and driver * version may be different. For example, it is valid for the API version to be 3020 while the driver version is 4020.

* * @param ctx context to check * @param version pointer to version */ @NativeType("CUresult") public static int cuCtxGetApiVersion(@NativeType("CUcontext") long ctx, @NativeType("unsigned int *") IntBuffer version) { if (CHECKS) { check(version, 1); } return ncuCtxGetApiVersion(ctx, memAddress(version)); } // --- [ cuCtxGetStreamPriorityRange ] --- /** Unsafe version of: {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange} */ public static int ncuCtxGetStreamPriorityRange(long leastPriority, long greatestPriority) { long __functionAddress = Functions.CtxGetStreamPriorityRange; return callPPI(leastPriority, greatestPriority, __functionAddress); } /** * Returns numerical values that correspond to the least and greatest stream priorities. * *

Returns in {@code *leastPriority} and {@code *greatestPriority} the numerical values that correspond to the least and greatest stream priorities * respectively. Stream priorities follow a convention where lower numbers imply greater priorities. The range of meaningful stream priorities is given by * [ {@code *greatestPriority}, {@code *leastPriority]}. If the user attempts to create a stream with a priority value that is outside the meaningful * range as specified by this API, the priority is automatically clamped down or up to either {@code *leastPriority} or {@code *greatestPriority} * respectively. See {@link #cuStreamCreateWithPriority StreamCreateWithPriority} for details on creating a priority stream. A {@code NULL} may be passed in for {@code *leastPriority} or {@code *greatestPriority} if the value is not desired.

* *

This function will return {@code 0} in both {@code *leastPriority} and {@code *greatestPriority} if the current context's device does not support * stream priorities (see {@link #cuDeviceGetAttribute DeviceGetAttribute}).

* * @param leastPriority pointer to an int in which the numerical value for least stream priority is returned * @param greatestPriority pointer to an int in which the numerical value for greatest stream priority is returned */ @NativeType("CUresult") public static int cuCtxGetStreamPriorityRange(@Nullable @NativeType("int *") IntBuffer leastPriority, @Nullable @NativeType("int *") IntBuffer greatestPriority) { if (CHECKS) { checkSafe(leastPriority, 1); checkSafe(greatestPriority, 1); } return ncuCtxGetStreamPriorityRange(memAddressSafe(leastPriority), memAddressSafe(greatestPriority)); } // --- [ cuCtxResetPersistingL2Cache ] --- /** * Resets all persisting lines in cache to normal status. * *

Takes effect on function return.

*/ @NativeType("CUresult") public static int cuCtxResetPersistingL2Cache() { long __functionAddress = Functions.CtxResetPersistingL2Cache; if (CHECKS) { check(__functionAddress); } return callI(__functionAddress); } // --- [ cuCtxGetExecAffinity ] --- /** Unsafe version of: {@link #cuCtxGetExecAffinity CtxGetExecAffinity} */ public static int ncuCtxGetExecAffinity(long pExecAffinity, int type) { long __functionAddress = Functions.CtxGetExecAffinity; if (CHECKS) { check(__functionAddress); } return callPI(pExecAffinity, type, __functionAddress); } /** * Returns the execution affinity setting for the current context. * *

Returns in {@code *pExecAffinity} the current value of {@code type}.

* * @param pExecAffinity returned execution affinity * @param type execution affinity type to query */ @NativeType("CUresult") public static int cuCtxGetExecAffinity(@NativeType("CUexecAffinityParam *") CUexecAffinityParam.Buffer pExecAffinity, @NativeType("CUexecAffinityType") int type) { if (CHECKS) { check(pExecAffinity, 1); } return ncuCtxGetExecAffinity(pExecAffinity.address(), type); } // --- [ cuCtxAttach ] --- /** Unsafe version of: {@link #cuCtxAttach CtxAttach} */ public static int ncuCtxAttach(long pctx, int flags) { long __functionAddress = Functions.CtxAttach; return callPI(pctx, flags, __functionAddress); } /** * Increment a context's usage-count. * *

Deprecated: Note that this function is deprecated and should not be used.

* *

Increments the usage count of the context and passes back a context handle in {@code *pctx} that must be passed to {@link #cuCtxDetach CtxDetach} when the application * is done with the context. {@code cuCtxAttach()} fails if there is no context current to the thread.

* *

Currently, the {@code flags} parameter must be 0.

* * @param pctx returned context handle of the current context * @param flags context attach flags (must be 0) */ @NativeType("CUresult") public static int cuCtxAttach(@NativeType("CUcontext *") PointerBuffer pctx, @NativeType("unsigned int") int flags) { if (CHECKS) { check(pctx, 1); } return ncuCtxAttach(memAddress(pctx), flags); } // --- [ cuCtxDetach ] --- /** * Decrement a context's usage-count * *

Deprecated: Note that this function is deprecated and should not be used.

* *

Decrements the usage count of the context {@code ctx}, and destroys the context if the usage count goes to 0. The context must be a handle that was * passed back by {@link #cuCtxCreate CtxCreate} or {@link #cuCtxAttach CtxAttach}, and must be current to the calling thread.

* * @param ctx context to destroy */ @NativeType("CUresult") public static int cuCtxDetach(@NativeType("CUcontext") long ctx) { long __functionAddress = Functions.CtxDetach; if (CHECKS) { check(ctx); } return callPI(ctx, __functionAddress); } // --- [ cuModuleLoad ] --- /** Unsafe version of: {@link #cuModuleLoad ModuleLoad} */ public static int ncuModuleLoad(long module, long fname) { long __functionAddress = Functions.ModuleLoad; return callPPI(module, fname, __functionAddress); } /** * Loads a compute module. * *

Takes a filename {@code fname} and loads the corresponding module {@code module} into the current context. The CUDA driver API does not attempt to * lazily allocate the resources needed by a module; if the memory for functions and data (constant and global) needed by the module cannot be allocated, * {@code cuModuleLoad()} fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc * or handwritten, or a fatbin file as output by nvcc from toolchain 4.0 or later.

* * @param module returned module * @param fname filename of module to load */ @NativeType("CUresult") public static int cuModuleLoad(@NativeType("CUmodule *") PointerBuffer module, @NativeType("char const *") ByteBuffer fname) { if (CHECKS) { check(module, 1); checkNT1(fname); } return ncuModuleLoad(memAddress(module), memAddress(fname)); } /** * Loads a compute module. * *

Takes a filename {@code fname} and loads the corresponding module {@code module} into the current context. The CUDA driver API does not attempt to * lazily allocate the resources needed by a module; if the memory for functions and data (constant and global) needed by the module cannot be allocated, * {@code cuModuleLoad()} fails. The file should be a cubin file as output by nvcc, or a PTX file either as output by nvcc * or handwritten, or a fatbin file as output by nvcc from toolchain 4.0 or later.

* * @param module returned module * @param fname filename of module to load */ @NativeType("CUresult") public static int cuModuleLoad(@NativeType("CUmodule *") PointerBuffer module, @NativeType("char const *") CharSequence fname) { if (CHECKS) { check(module, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(fname, true); long fnameEncoded = stack.getPointerAddress(); return ncuModuleLoad(memAddress(module), fnameEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuModuleLoadData ] --- /** Unsafe version of: {@link #cuModuleLoadData ModuleLoadData} */ public static int ncuModuleLoadData(long module, long image) { long __functionAddress = Functions.ModuleLoadData; return callPPI(module, image, __functionAddress); } /** * Load a module's data. * *

Takes a pointer {@code image} and loads the corresponding module {@code module} into the current context. The pointer may be obtained by mapping a * cubin or PTX or fatbin file, passing a cubin or PTX or fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable resources and using operating system calls such as Windows * {@code FindResource()} to obtain the pointer.

* * @param module returned module * @param image module data to load */ @NativeType("CUresult") public static int cuModuleLoadData(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer image) { if (CHECKS) { check(module, 1); } return ncuModuleLoadData(memAddress(module), memAddress(image)); } // --- [ cuModuleLoadDataEx ] --- /** * Unsafe version of: {@link #cuModuleLoadDataEx ModuleLoadDataEx} * * @param numOptions number of options */ public static int ncuModuleLoadDataEx(long module, long image, int numOptions, long options, long optionValues) { long __functionAddress = Functions.ModuleLoadDataEx; return callPPPPI(module, image, numOptions, options, optionValues, __functionAddress); } /** * Load a module's data with options. * *

Takes a pointer {@code image} and loads the corresponding module {@code module} into the current context. The pointer may be obtained by mapping a * cubin or PTX or fatbin file, passing a cubin or PTX or fatbin file as a NULL-terminated text * string, or incorporating a cubin or fatbin object into the executable resources and using operating system calls such as Windows * {@code FindResource()} to obtain the pointer. Options are passed as an array via {@code options} and any corresponding parameters are passed in {@code * optionValues}. The number of total options is supplied via {@code numOptions}. Any outputs will be returned via {@code optionValues}.

* * @param module returned module * @param image module data to load * @param options options for JIT * @param optionValues option values for JIT */ @NativeType("CUresult") public static int cuModuleLoadDataEx(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer image, @Nullable @NativeType("CUjit_option *") IntBuffer options, @Nullable @NativeType("void **") PointerBuffer optionValues) { if (CHECKS) { check(module, 1); checkSafe(optionValues, remainingSafe(options)); } return ncuModuleLoadDataEx(memAddress(module), memAddress(image), remainingSafe(options), memAddressSafe(options), memAddressSafe(optionValues)); } // --- [ cuModuleLoadFatBinary ] --- /** Unsafe version of: {@link #cuModuleLoadFatBinary ModuleLoadFatBinary} */ public static int ncuModuleLoadFatBinary(long module, long fatCubin) { long __functionAddress = Functions.ModuleLoadFatBinary; return callPPI(module, fatCubin, __functionAddress); } /** * Load a module's data. * *

Takes a pointer {@code fatCubin} and loads the corresponding module {@code module} into the current context. The pointer represents a fat binary * object, which is a collection of different cubin and/or PTX files, all representing the same device code, but compiled and optimized * for different architectures.

* *

Prior to CUDA 4.0, there was no documented API for constructing and using fat binary objects by programmers. Starting with CUDA 4.0, fat binary objects * can be constructed by providing the -fatbin option to nvcc. More information can be found in the nvcc document.

* * @param module returned module * @param fatCubin fat binary to load */ @NativeType("CUresult") public static int cuModuleLoadFatBinary(@NativeType("CUmodule *") PointerBuffer module, @NativeType("void const *") ByteBuffer fatCubin) { if (CHECKS) { check(module, 1); } return ncuModuleLoadFatBinary(memAddress(module), memAddress(fatCubin)); } // --- [ cuModuleUnload ] --- /** * Unloads a module. * *

Unloads a module {@code hmod} from the current context.

* * @param hmod module to unload */ @NativeType("CUresult") public static int cuModuleUnload(@NativeType("CUmodule") long hmod) { long __functionAddress = Functions.ModuleUnload; if (CHECKS) { check(hmod); } return callPI(hmod, __functionAddress); } // --- [ cuModuleGetFunction ] --- /** Unsafe version of: {@link #cuModuleGetFunction ModuleGetFunction} */ public static int ncuModuleGetFunction(long hfunc, long hmod, long name) { long __functionAddress = Functions.ModuleGetFunction; if (CHECKS) { check(hmod); } return callPPPI(hfunc, hmod, name, __functionAddress); } /** * Returns a function handle. * *

Returns in {@code *hfunc} the handle of the function of name {@code name} located in module {@code hmod}. If no function of that name exists, * {@code cuModuleGetFunction()} returns {@link #CUDA_ERROR_NOT_FOUND}.

* * @param hfunc returned function handle * @param hmod module to retrieve function from * @param name name of function to retrieve */ @NativeType("CUresult") public static int cuModuleGetFunction(@NativeType("CUfunction *") PointerBuffer hfunc, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) { if (CHECKS) { check(hfunc, 1); checkNT1(name); } return ncuModuleGetFunction(memAddress(hfunc), hmod, memAddress(name)); } /** * Returns a function handle. * *

Returns in {@code *hfunc} the handle of the function of name {@code name} located in module {@code hmod}. If no function of that name exists, * {@code cuModuleGetFunction()} returns {@link #CUDA_ERROR_NOT_FOUND}.

* * @param hfunc returned function handle * @param hmod module to retrieve function from * @param name name of function to retrieve */ @NativeType("CUresult") public static int cuModuleGetFunction(@NativeType("CUfunction *") PointerBuffer hfunc, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) { if (CHECKS) { check(hfunc, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(name, true); long nameEncoded = stack.getPointerAddress(); return ncuModuleGetFunction(memAddress(hfunc), hmod, nameEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuModuleGetGlobal ] --- /** Unsafe version of: {@link #cuModuleGetGlobal ModuleGetGlobal} */ public static int ncuModuleGetGlobal(long dptr, long bytes, long hmod, long name) { long __functionAddress = Functions.ModuleGetGlobal; if (CHECKS) { check(hmod); } return callPPPPI(dptr, bytes, hmod, name, __functionAddress); } /** * Returns a global pointer from a module. * *

Returns in {@code *dptr} and {@code *bytes} the base pointer and size of the global of name {@code name} located in module {@code hmod}. If no variable * of that name exists, {@code cuModuleGetGlobal()} returns {@link #CUDA_ERROR_NOT_FOUND}. Both parameters {@code dptr} and {@code bytes} are optional. If one of * them is {@code NULL}, it is ignored.

* * @param dptr returned global device pointer * @param bytes returned global size in bytes * @param hmod module to retrieve global from * @param name name of global to retrieve */ @NativeType("CUresult") public static int cuModuleGetGlobal(@Nullable @NativeType("CUdeviceptr *") PointerBuffer dptr, @Nullable @NativeType("size_t *") PointerBuffer bytes, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) { if (CHECKS) { checkSafe(dptr, 1); checkSafe(bytes, 1); checkNT1(name); } return ncuModuleGetGlobal(memAddressSafe(dptr), memAddressSafe(bytes), hmod, memAddress(name)); } /** * Returns a global pointer from a module. * *

Returns in {@code *dptr} and {@code *bytes} the base pointer and size of the global of name {@code name} located in module {@code hmod}. If no variable * of that name exists, {@code cuModuleGetGlobal()} returns {@link #CUDA_ERROR_NOT_FOUND}. Both parameters {@code dptr} and {@code bytes} are optional. If one of * them is {@code NULL}, it is ignored.

* * @param dptr returned global device pointer * @param bytes returned global size in bytes * @param hmod module to retrieve global from * @param name name of global to retrieve */ @NativeType("CUresult") public static int cuModuleGetGlobal(@Nullable @NativeType("CUdeviceptr *") PointerBuffer dptr, @Nullable @NativeType("size_t *") PointerBuffer bytes, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) { if (CHECKS) { checkSafe(dptr, 1); checkSafe(bytes, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(name, true); long nameEncoded = stack.getPointerAddress(); return ncuModuleGetGlobal(memAddressSafe(dptr), memAddressSafe(bytes), hmod, nameEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuModuleGetTexRef ] --- /** Unsafe version of: {@link #cuModuleGetTexRef ModuleGetTexRef} */ public static int ncuModuleGetTexRef(long pTexRef, long hmod, long name) { long __functionAddress = Functions.ModuleGetTexRef; if (CHECKS) { check(hmod); } return callPPPI(pTexRef, hmod, name, __functionAddress); } /** * Returns a handle to a texture reference. * *

Returns in {@code *pTexRef} the handle of the texture reference of name {@code name} in the module {@code hmod}. If no texture reference of that name * exists, {@code cuModuleGetTexRef()} returns {@link #CUDA_ERROR_NOT_FOUND}. This texture reference handle should not be destroyed, since it will be destroyed * when the module is unloaded.

* * @param pTexRef returned texture reference * @param hmod module to retrieve texture reference from * @param name name of texture reference to retrieve */ @NativeType("CUresult") public static int cuModuleGetTexRef(@NativeType("CUtexref *") PointerBuffer pTexRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) { if (CHECKS) { check(pTexRef, 1); checkNT1(name); } return ncuModuleGetTexRef(memAddress(pTexRef), hmod, memAddress(name)); } /** * Returns a handle to a texture reference. * *

Returns in {@code *pTexRef} the handle of the texture reference of name {@code name} in the module {@code hmod}. If no texture reference of that name * exists, {@code cuModuleGetTexRef()} returns {@link #CUDA_ERROR_NOT_FOUND}. This texture reference handle should not be destroyed, since it will be destroyed * when the module is unloaded.

* * @param pTexRef returned texture reference * @param hmod module to retrieve texture reference from * @param name name of texture reference to retrieve */ @NativeType("CUresult") public static int cuModuleGetTexRef(@NativeType("CUtexref *") PointerBuffer pTexRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) { if (CHECKS) { check(pTexRef, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(name, true); long nameEncoded = stack.getPointerAddress(); return ncuModuleGetTexRef(memAddress(pTexRef), hmod, nameEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuModuleGetSurfRef ] --- /** Unsafe version of: {@link #cuModuleGetSurfRef ModuleGetSurfRef} */ public static int ncuModuleGetSurfRef(long pSurfRef, long hmod, long name) { long __functionAddress = Functions.ModuleGetSurfRef; if (CHECKS) { check(hmod); } return callPPPI(pSurfRef, hmod, name, __functionAddress); } /** * Returns a handle to a surface reference. * *

Returns in {@code *pSurfRef} the handle of the surface reference of name {@code name} in the module {@code hmod}. If no surface reference of that name * exists, {@code cuModuleGetSurfRef()} returns {@link #CUDA_ERROR_NOT_FOUND}.

* * @param pSurfRef returned surface reference * @param hmod module to retrieve surface reference from * @param name name of surface reference to retrieve */ @NativeType("CUresult") public static int cuModuleGetSurfRef(@NativeType("CUsurfref *") PointerBuffer pSurfRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") ByteBuffer name) { if (CHECKS) { check(pSurfRef, 1); checkNT1(name); } return ncuModuleGetSurfRef(memAddress(pSurfRef), hmod, memAddress(name)); } /** * Returns a handle to a surface reference. * *

Returns in {@code *pSurfRef} the handle of the surface reference of name {@code name} in the module {@code hmod}. If no surface reference of that name * exists, {@code cuModuleGetSurfRef()} returns {@link #CUDA_ERROR_NOT_FOUND}.

* * @param pSurfRef returned surface reference * @param hmod module to retrieve surface reference from * @param name name of surface reference to retrieve */ @NativeType("CUresult") public static int cuModuleGetSurfRef(@NativeType("CUsurfref *") PointerBuffer pSurfRef, @NativeType("CUmodule") long hmod, @NativeType("char const *") CharSequence name) { if (CHECKS) { check(pSurfRef, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(name, true); long nameEncoded = stack.getPointerAddress(); return ncuModuleGetSurfRef(memAddress(pSurfRef), hmod, nameEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuLinkCreate ] --- /** * Unsafe version of: {@link #cuLinkCreate LinkCreate} * * @param numOptions size of options arrays */ public static int ncuLinkCreate(int numOptions, long options, long optionValues, long stateOut) { long __functionAddress = Functions.LinkCreate; if (CHECKS) { check(__functionAddress); } return callPPPI(numOptions, options, optionValues, stateOut, __functionAddress); } /** * Creates a pending JIT linker invocation. * *

If the call is successful, the caller owns the returned {@code CUlinkState}, which should eventually be destroyed with {@link #cuLinkDestroy LinkDestroy}. The device code * machine size (32 or 64 bit) will match the calling application.

* *

Both linker and compiler options may be specified. Compiler options will be applied to inputs to this linker action which must be compiled from PTX. * The options {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, {@link #CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES JIT_INFO_LOG_BUFFER_SIZE_BYTES}, and {@link #CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES JIT_ERROR_LOG_BUFFER_SIZE_BYTES} will accumulate data until the {@code CUlinkState} is * destroyed.

* *

{@code optionValues} must remain valid for the life of the {@code CUlinkState} if output options are used. No other references to inputs are maintained * after this call returns.

* * @param options array of linker and compiler options * @param optionValues array of option values, each cast to void * * @param stateOut on success, this will contain a {@code CUlinkState} to specify and complete this action */ @NativeType("CUresult") public static int cuLinkCreate(@NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues, @NativeType("CUlinkState *") PointerBuffer stateOut) { if (CHECKS) { check(optionValues, options.remaining()); check(stateOut, 1); } return ncuLinkCreate(options.remaining(), memAddress(options), memAddress(optionValues), memAddress(stateOut)); } // --- [ cuLinkAddData ] --- /** * Unsafe version of: {@link #cuLinkAddData LinkAddData} * * @param size the length of the input data * @param numOptions size of options */ public static int ncuLinkAddData(long state, int type, long data, long size, long name, int numOptions, long options, long optionValues) { long __functionAddress = Functions.LinkAddData; if (CHECKS) { check(__functionAddress); check(state); } return callPPPPPPI(state, type, data, size, name, numOptions, options, optionValues, __functionAddress); } /** * Add an input to a pending linker invocation. * *

Ownership of {@code data} is retained by the caller. No reference is retained to any inputs after this call returns.

* *

This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, * {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.

* * @param state a pending linker action * @param type the type of the input data * @param data the input data. PTX must be NULL-terminated. * @param name an optional name for this input in log messages * @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate}) * @param optionValues array of option values, each cast to void * */ @NativeType("CUresult") public static int cuLinkAddData(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("void *") ByteBuffer data, @NativeType("char const *") ByteBuffer name, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) { if (CHECKS) { checkNT1(name); check(optionValues, options.remaining()); } return ncuLinkAddData(state, type, memAddress(data), data.remaining(), memAddress(name), options.remaining(), memAddress(options), memAddress(optionValues)); } /** * Add an input to a pending linker invocation. * *

Ownership of {@code data} is retained by the caller. No reference is retained to any inputs after this call returns.

* *

This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, * {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.

* * @param state a pending linker action * @param type the type of the input data * @param data the input data. PTX must be NULL-terminated. * @param name an optional name for this input in log messages * @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate}) * @param optionValues array of option values, each cast to void * */ @NativeType("CUresult") public static int cuLinkAddData(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("void *") ByteBuffer data, @NativeType("char const *") CharSequence name, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) { if (CHECKS) { check(optionValues, options.remaining()); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(name, true); long nameEncoded = stack.getPointerAddress(); return ncuLinkAddData(state, type, memAddress(data), data.remaining(), nameEncoded, options.remaining(), memAddress(options), memAddress(optionValues)); } finally { stack.setPointer(stackPointer); } } // --- [ cuLinkAddFile ] --- /** * Unsafe version of: {@link #cuLinkAddFile LinkAddFile} * * @param numOptions size of options */ public static int ncuLinkAddFile(long state, int type, long path, int numOptions, long options, long optionValues) { long __functionAddress = Functions.LinkAddFile; if (CHECKS) { check(__functionAddress); check(state); } return callPPPPI(state, type, path, numOptions, options, optionValues, __functionAddress); } /** * Add a file input to a pending linker invocation. * *

No reference is retained to any inputs after this call returns.

* *

This method accepts only compiler options, which are used if the input must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, * {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.

* *

This method is equivalent to invoking {@link #cuLinkAddData LinkAddData} on the contents of the file.

* * @param state a pending linker action * @param type the type of the input data * @param path path to the input file * @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate}) * @param optionValues array of option values, each cast to void * */ @NativeType("CUresult") public static int cuLinkAddFile(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("char const *") ByteBuffer path, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) { if (CHECKS) { checkNT1(path); check(optionValues, options.remaining()); } return ncuLinkAddFile(state, type, memAddress(path), options.remaining(), memAddress(options), memAddress(optionValues)); } /** * Add a file input to a pending linker invocation. * *

No reference is retained to any inputs after this call returns.

* *

This method accepts only compiler options, which are used if the input must be compiled from PTX, and does not accept any of {@link #CU_JIT_WALL_TIME JIT_WALL_TIME}, * {@link #CU_JIT_INFO_LOG_BUFFER JIT_INFO_LOG_BUFFER}, {@link #CU_JIT_ERROR_LOG_BUFFER JIT_ERROR_LOG_BUFFER}, {@link #CU_JIT_TARGET_FROM_CUCONTEXT JIT_TARGET_FROM_CUCONTEXT}, or {@link #CU_JIT_TARGET JIT_TARGET}.

* *

This method is equivalent to invoking {@link #cuLinkAddData LinkAddData} on the contents of the file.

* * @param state a pending linker action * @param type the type of the input data * @param path path to the input file * @param options options to be applied only for this input (overrides options from {@link #cuLinkCreate LinkCreate}) * @param optionValues array of option values, each cast to void * */ @NativeType("CUresult") public static int cuLinkAddFile(@NativeType("CUlinkState") long state, @NativeType("CUjitInputType") int type, @NativeType("char const *") CharSequence path, @NativeType("CUjit_option *") IntBuffer options, @NativeType("void **") PointerBuffer optionValues) { if (CHECKS) { check(optionValues, options.remaining()); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(path, true); long pathEncoded = stack.getPointerAddress(); return ncuLinkAddFile(state, type, pathEncoded, options.remaining(), memAddress(options), memAddress(optionValues)); } finally { stack.setPointer(stackPointer); } } // --- [ cuLinkComplete ] --- /** Unsafe version of: {@link #cuLinkComplete LinkComplete} */ public static int ncuLinkComplete(long state, long cubinOut, long sizeOut) { long __functionAddress = Functions.LinkComplete; if (CHECKS) { check(__functionAddress); check(state); } return callPPPI(state, cubinOut, sizeOut, __functionAddress); } /** * Complete a pending linker invocation. * *

Completes the pending linker action and returns the cubin image for the linked device code, which can be used with {@link #cuModuleLoadData ModuleLoadData}. The cubin is * owned by {@code state}, so it should be loaded before {@code state} is destroyed via {@link #cuLinkDestroy LinkDestroy}. This call does not destroy {@code state}.

* * @param state a pending linker invocation * @param cubinOut on success, this will point to the output image * @param sizeOut optional parameter to receive the size of the generated image */ @NativeType("CUresult") public static int cuLinkComplete(@NativeType("CUlinkState") long state, @NativeType("void **") PointerBuffer cubinOut, @NativeType("size_t *") PointerBuffer sizeOut) { if (CHECKS) { check(cubinOut, 1); check(sizeOut, 1); } return ncuLinkComplete(state, memAddress(cubinOut), memAddress(sizeOut)); } // --- [ cuLinkDestroy ] --- /** * Destroys state for a JIT linker invocation. * * @param state state object for the linker invocation */ @NativeType("CUresult") public static int cuLinkDestroy(@NativeType("CUlinkState") long state) { long __functionAddress = Functions.LinkDestroy; if (CHECKS) { check(__functionAddress); check(state); } return callPI(state, __functionAddress); } // --- [ cuMemGetInfo ] --- /** Unsafe version of: {@link #cuMemGetInfo MemGetInfo} */ public static int ncuMemGetInfo(long free, long total) { long __functionAddress = Functions.MemGetInfo; return callPPI(free, total, __functionAddress); } /** * Gets free and total memory. * *

Returns in {@code *total} the total amount of memory available to the the current context. Returns in {@code *free} the amount of memory on the device * that is free according to the OS. CUDA is not guaranteed to be able to allocate all of the memory that the OS reports as free.

* * @param free returned free memory in bytes * @param total returned total memory in bytes */ @NativeType("CUresult") public static int cuMemGetInfo(@NativeType("size_t *") PointerBuffer free, @NativeType("size_t *") PointerBuffer total) { if (CHECKS) { check(free, 1); check(total, 1); } return ncuMemGetInfo(memAddress(free), memAddress(total)); } // --- [ cuMemAlloc ] --- /** Unsafe version of: {@link #cuMemAlloc MemAlloc} */ public static int ncuMemAlloc(long dptr, long bytesize) { long __functionAddress = Functions.MemAlloc; return callPPI(dptr, bytesize, __functionAddress); } /** * Allocates device memory. * *

Allocates {@code bytesize} bytes of linear memory on the device and returns in {@code *dptr} a pointer to the allocated memory. The allocated memory is * suitably aligned for any kind of variable. The memory is not cleared. If {@code bytesize} is 0, {@code cuMemAlloc()} returns {@link #CUDA_ERROR_INVALID_VALUE}.

* * @param dptr returned device pointer * @param bytesize requested allocation size in bytes */ @NativeType("CUresult") public static int cuMemAlloc(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize) { if (CHECKS) { check(dptr, 1); } return ncuMemAlloc(memAddress(dptr), bytesize); } // --- [ cuMemAllocPitch ] --- /** Unsafe version of: {@link #cuMemAllocPitch MemAllocPitch} */ public static int ncuMemAllocPitch(long dptr, long pPitch, long WidthInBytes, long Height, int ElementSizeBytes) { long __functionAddress = Functions.MemAllocPitch; return callPPPPI(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes, __functionAddress); } /** * Allocates pitched device memory. * *

Allocates at least {@code WidthInBytes} * {@code Height} bytes of linear memory on the device and returns in {@code *dptr} a pointer to the allocated * memory. The function may pad the allocation to ensure that corresponding pointers in any given row will continue to meet the alignment requirements for * coalescing as the address is updated from row to row. {@code ElementSizeBytes} specifies the size of the largest reads and writes that will be * performed on the memory range. {@code ElementSizeBytes} may be 4, 8 or 16 (since coalesced memory transactions are not possible on other data sizes). * If {@code ElementSizeBytes} is smaller than the actual read/write size of a kernel, the kernel will run correctly, but possibly at reduced speed. The * pitch returned in {@code *pPitch} by {@code cuMemAllocPitch()} is the width in bytes of the allocation. The intended usage of pitch is as a separate * parameter of the allocation, used to compute addresses within the 2D array. Given the row and column of an array element of type T, the address * is computed as:

* *

     *    T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
* *

The pitch returned by {@code cuMemAllocPitch()} is guaranteed to work with {@link #cuMemcpy2D Memcpy2D} under all circumstances. For allocations of 2D arrays, it is * recommended that programmers consider performing pitch allocations using {@code cuMemAllocPitch()}. Due to alignment restrictions in the hardware, this * is especially true if the application will be performing 2D memory copies between different regions of device memory (whether linear memory or CUDA * arrays).

* *

The byte alignment of the pitch returned by {@code cuMemAllocPitch()} is guaranteed to match or exceed the alignment requirement for texture binding * with {@link #cuTexRefSetAddress2D TexRefSetAddress2D}.

* * @param dptr returned device pointer * @param pPitch returned pitch of allocation in bytes * @param WidthInBytes requested allocation width in bytes * @param Height requested allocation height in rows * @param ElementSizeBytes size of largest reads/writes for range */ @NativeType("CUresult") public static int cuMemAllocPitch(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t *") PointerBuffer pPitch, @NativeType("size_t") long WidthInBytes, @NativeType("size_t") long Height, @NativeType("unsigned int") int ElementSizeBytes) { if (CHECKS) { check(dptr, 1); check(pPitch, 1); } return ncuMemAllocPitch(memAddress(dptr), memAddress(pPitch), WidthInBytes, Height, ElementSizeBytes); } // --- [ cuMemFree ] --- /** * Frees device memory. * *

Frees the memory space pointed to by {@code dptr}, which must have been returned by a previous call to {@link #cuMemAlloc MemAlloc} or {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dptr pointer to memory to free */ @NativeType("CUresult") public static int cuMemFree(@NativeType("CUdeviceptr") long dptr) { long __functionAddress = Functions.MemFree; if (CHECKS) { check(dptr); } return callPI(dptr, __functionAddress); } // --- [ cuMemGetAddressRange ] --- /** Unsafe version of: {@link #cuMemGetAddressRange MemGetAddressRange} */ public static int ncuMemGetAddressRange(long pbase, long psize, long dptr) { long __functionAddress = Functions.MemGetAddressRange; if (CHECKS) { check(dptr); } return callPPPI(pbase, psize, dptr, __functionAddress); } /** * Get information on memory allocations. * *

Returns the base address in {@code *pbase} and size in {@code *psize} of the allocation by {@link #cuMemAlloc MemAlloc} or {@link #cuMemAllocPitch MemAllocPitch} that contains the input * pointer {@code dptr}. Both parameters {@code pbase} and {@code psize} are optional. If one of them is {@code NULL}, it is ignored.

* * @param pbase returned base address * @param psize returned size of device memory allocation * @param dptr device pointer to query */ @NativeType("CUresult") public static int cuMemGetAddressRange(@Nullable @NativeType("CUdeviceptr *") PointerBuffer pbase, @Nullable @NativeType("size_t *") PointerBuffer psize, @NativeType("CUdeviceptr") long dptr) { if (CHECKS) { checkSafe(pbase, 1); checkSafe(psize, 1); } return ncuMemGetAddressRange(memAddressSafe(pbase), memAddressSafe(psize), dptr); } // --- [ cuMemAllocHost ] --- /** Unsafe version of: {@link #cuMemAllocHost MemAllocHost} */ public static int ncuMemAllocHost(long pp, long bytesize) { long __functionAddress = Functions.MemAllocHost; return callPPI(pp, bytesize, __functionAddress); } /** * Allocates page-locked host memory. * *

Allocates {@code bytesize} bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated * with this function and automatically accelerates calls to functions such as {@link #cuMemcpy Memcpy}. Since the memory can be accessed directly by the device, it can * be read or written with much higher bandwidth than pageable memory obtained with functions such as {@code malloc()}. Allocating excessive amounts of * memory with {@code cuMemAllocHost()} may degrade system performance, since it reduces the amount of memory available to the system for paging. As a * result, this function is best used sparingly to allocate staging areas for data exchange between host and device.

* *

Note all host memory allocated using {@code cuMemHostAlloc()} will automatically be immediately accessible to all contexts on all devices which support * unified addressing (as may be queried using {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}). The device pointer that may be used to access this host memory from * those contexts is always equal to the returned host pointer {@code *pp}. See {@code CUDA_UNIFIED} for additional details.

* * @param pp returned host pointer to page-locked memory * @param bytesize requested allocation size in bytes */ @NativeType("CUresult") public static int cuMemAllocHost(@NativeType("void **") PointerBuffer pp, @NativeType("size_t") long bytesize) { if (CHECKS) { check(pp, 1); } return ncuMemAllocHost(memAddress(pp), bytesize); } // --- [ cuMemFreeHost ] --- /** Unsafe version of: {@link #cuMemFreeHost MemFreeHost} */ public static int ncuMemFreeHost(long p) { long __functionAddress = Functions.MemFreeHost; return callPI(p, __functionAddress); } /** * Frees page-locked host memory. * *

Frees the memory space pointed to by {@code p}, which must have been returned by a previous call to {@link #cuMemAllocHost MemAllocHost}.

* * @param p pointer to memory to free */ @NativeType("CUresult") public static int cuMemFreeHost(@NativeType("void *") ByteBuffer p) { return ncuMemFreeHost(memAddress(p)); } // --- [ cuMemHostAlloc ] --- /** Unsafe version of: {@link #cuMemHostAlloc MemHostAlloc} */ public static int ncuMemHostAlloc(long pp, long bytesize, int Flags) { long __functionAddress = Functions.MemHostAlloc; return callPPI(pp, bytesize, Flags, __functionAddress); } /** * Allocates page-locked host memory. * *

Allocates {@code bytesize} bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual memory ranges allocated * with this function and automatically accelerates calls to functions such as {@link #cuMemcpyHtoD MemcpyHtoD}. Since the memory can be accessed directly by the device, * it can be read or written with much higher bandwidth than pageable memory obtained with functions such as {@code malloc()}. Allocating excessive * amounts of pinned memory may degrade system performance, since it reduces the amount of memory available to the system for paging. As a result, this * function is best used sparingly to allocate staging areas for data exchange between host and device.

* *

The {@code Flags} parameter enables different options to be specified that affect the allocation, as follows:

* *
    *
  • {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE}: The memory returned by this call will be considered as pinned memory by all CUDA contexts, not just the one that performed * the allocation.
  • *
  • {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP}: Maps the allocation into the CUDA address space. The device pointer to the memory may be obtained by calling * {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.
  • *
  • {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED}: Allocates the memory as write-combined (WC). WC memory can be transferred across the PCI Express bus more quickly on * some system configurations, but cannot be read efficiently by most CPUs. WC memory is a good option for buffers that will be written by the CPU and * read by the GPU via mapped pinned memory or host->device transfers.
  • *
* *

All of these flags are orthogonal to one another: a developer may allocate memory that is portable, mapped and/or write-combined with no restrictions.

* *

The {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} flag may be specified on CUDA contexts for devices that do not support mapped pinned memory. The failure is deferred to * {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} because the memory may be mapped into other CUDA contexts via the {@link #CU_MEMHOSTALLOC_PORTABLE MEMHOSTALLOC_PORTABLE} flag.

* *

The memory allocated by this function must be freed with {@link #cuMemFreeHost MemFreeHost}.

* *

Note all host memory allocated using {@code cuMemHostAlloc()} will automatically be immediately accessible to all contexts on all devices which support * unified addressing (as may be queried using {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}). Unless the flag {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} is specified, the device * pointer that may be used to access this host memory from those contexts is always equal to the returned host pointer {@code *pp}. If the flag * {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} is specified, then the function {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} must be used to query the device pointer, even if the context * supports unified addressing. See {@code CUDA_UNIFIED} for additional details.

* * @param pp returned host pointer to page-locked memory * @param bytesize requested allocation size in bytes * @param Flags flags for allocation request */ @NativeType("CUresult") public static int cuMemHostAlloc(@NativeType("void **") PointerBuffer pp, @NativeType("size_t") long bytesize, @NativeType("unsigned int") int Flags) { if (CHECKS) { check(pp, 1); } return ncuMemHostAlloc(memAddress(pp), bytesize, Flags); } // --- [ cuMemHostGetDevicePointer ] --- /** Unsafe version of: {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} */ public static int ncuMemHostGetDevicePointer(long pdptr, long p, int Flags) { long __functionAddress = Functions.MemHostGetDevicePointer; return callPPI(pdptr, p, Flags, __functionAddress); } /** * Passes back device pointer of mapped pinned memory. * *

Passes back the device pointer {@code pdptr} corresponding to the mapped, pinned host buffer {@code p} allocated by {@link #cuMemHostAlloc MemHostAlloc}.

* *

{@code cuMemHostGetDevicePointer()} will fail if the {@link #CU_MEMHOSTALLOC_DEVICEMAP MEMHOSTALLOC_DEVICEMAP} flag was not specified at the time the memory was allocated, or if the * function is called on a GPU that does not support mapped pinned memory.

* *

For devices that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM}, the memory can also be * accessed from the device using the host pointer {@code p}. The device pointer returned by {@code cuMemHostGetDevicePointer()} may or may not match the * original host pointer {@code p} and depends on the devices visible to the application. If all devices visible to the application have a non-zero value * for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will match the original pointer {@code p}. If any device * visible to the application has a zero value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will not match * the original host pointer {@code p}, but it will be suitable for use on all devices provided Unified Virtual Addressing is enabled. In such systems, it * is valid to access the memory using either pointer on devices that have a non-zero value for the device attribute. Note however that such devices * should access the memory using only of the two pointers and not both.

* *

{@code Flags} provides for future releases. For now, it must be set to 0.

* * @param pdptr returned device pointer * @param p host pointer * @param Flags options (must be 0) */ @NativeType("CUresult") public static int cuMemHostGetDevicePointer(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("void *") ByteBuffer p, @NativeType("unsigned int") int Flags) { if (CHECKS) { check(pdptr, 1); } return ncuMemHostGetDevicePointer(memAddress(pdptr), memAddress(p), Flags); } // --- [ cuMemHostGetFlags ] --- /** Unsafe version of: {@link #cuMemHostGetFlags MemHostGetFlags} */ public static int ncuMemHostGetFlags(long pFlags, long p) { long __functionAddress = Functions.MemHostGetFlags; return callPPI(pFlags, p, __functionAddress); } /** * Passes back flags that were used for a pinned allocation * *

Passes back the flags {@code pFlags} that were specified when allocating the pinned host buffer {@code p} allocated by {@link #cuMemHostAlloc MemHostAlloc}.

* *

{@code cuMemHostGetFlags()} will fail if the pointer does not reside in an allocation performed by {@link #cuMemAllocHost MemAllocHost} or {@code cuMemHostAlloc()}.

* * @param pFlags returned flags word * @param p host pointer */ @NativeType("CUresult") public static int cuMemHostGetFlags(@NativeType("unsigned int *") IntBuffer pFlags, @NativeType("void *") ByteBuffer p) { if (CHECKS) { check(pFlags, 1); } return ncuMemHostGetFlags(memAddress(pFlags), memAddress(p)); } // --- [ cuMemAllocManaged ] --- /** Unsafe version of: {@link #cuMemAllocManaged MemAllocManaged} */ public static int ncuMemAllocManaged(long dptr, long bytesize, int flags) { long __functionAddress = Functions.MemAllocManaged; if (CHECKS) { check(__functionAddress); } return callPPI(dptr, bytesize, flags, __functionAddress); } /** * Allocates memory that will be automatically managed by the Unified Memory system. * *

Allocates {@code bytesize} bytes of managed memory on the device and returns in {@code *dptr} a pointer to the allocated memory. If the device doesn't * support allocating managed memory, {@link #CUDA_ERROR_NOT_SUPPORTED} is returned. Support for managed memory can be queried using the device attribute * {@link #CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY DEVICE_ATTRIBUTE_MANAGED_MEMORY}. The allocated memory is suitably aligned for any kind of variable. The memory is not cleared. If {@code bytesize} * is 0, {@link #cuMemAllocManaged MemAllocManaged} returns {@link #CUDA_ERROR_INVALID_VALUE}. The pointer is valid on the CPU and on all GPUs in the system that support managed memory. * All accesses to this pointer must obey the Unified Memory programming model.

* *

{@code flags} specifies the default stream association for this allocation. {@code flags} must be one of {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} or {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST}. If * {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} is specified, then this memory is accessible from any stream on any device. If {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} is specified, then the allocation * should not be accessed from devices that have a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}; an explicit call to * {@link #cuStreamAttachMemAsync StreamAttachMemAsync} will be required to enable access on such devices.

* *

If the association is later changed via {@link #cuStreamAttachMemAsync StreamAttachMemAsync} to a single stream, the default association as specifed during {@link #cuMemAllocManaged MemAllocManaged} is * restored when that stream is destroyed. For __managed__ variables, the default association is always {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL}. Note that destroying a stream * is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has completed.

* *

Memory allocated with {@link #cuMemAllocManaged MemAllocManaged} should be released with {@link #cuMemFree MemFree}.

* *

Device memory oversubscription is possible for GPUs that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. * Managed memory on such GPUs may be evicted from device memory to host memory at any time by the Unified Memory driver in order to make room for other * allocations.

* *

In a multi-GPU system where all GPUs have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}, managed memory may * not be populated when this API returns and instead may be populated on access. In such systems, managed memory can migrate to any processor's memory at * any time. The Unified Memory driver will employ heuristics to maintain data locality and prevent excessive page faults to the extent possible. The * application can also guide the driver about memory usage patterns via {@link #cuMemAdvise MemAdvise}. The application can also explicitly migrate memory to a desired * processor's memory via {@link #cuMemPrefetchAsync MemPrefetchAsync}.

* *

In a multi-GPU system where all of the GPUs have a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} and all the GPUs * have peer-to-peer support with each other, the physical storage for managed memory is created on the GPU which is active at the time * {@link #cuMemAllocManaged MemAllocManaged} is called. All other GPUs will reference the data at reduced bandwidth via peer mappings over the PCIe bus. The Unified Memory * driver does not migrate memory among such GPUs.

* *

In a multi-GPU system where not all GPUs have peer-to-peer support with each other and where the value of the device attribute * {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} is zero for at least one of those GPUs, the location chosen for physical storage of managed memory is * system-dependent.

* *
    *
  • On Linux, the location chosen will be device memory as long as the current set of active contexts are on devices that either have peer-to-peer * support with each other or have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If there is an active * context on a GPU that does not have a non-zero value for that device attribute and it does not have peer-to-peer support with the other devices * that have active contexts on them, then the location for physical storage will be 'zero-copy' or host memory. Note that this means that managed * memory that is located in device memory is migrated to host memory if a new context is created on a GPU that doesn't have a non-zero value for the * device attribute and does not support peer-to-peer with at least one of the other devices that has an active context. This in turn implies that * context creation may fail if there is insufficient host memory to migrate all managed allocations.
  • *
  • On Windows, the physical storage is always created in 'zero-copy' or host memory. All GPUs will reference the data at reduced bandwidth over the * PCIe bus. In these circumstances, use of the environment variable {@code CUDA_VISIBLE_DEVICES} is recommended to restrict CUDA to only use those * GPUs that have peer-to-peer support. Alternatively, users can also set {@code CUDA_MANAGED_FORCE_DEVICE_ALLOC} to a non-zero value to force the * driver to always use device memory for physical storage. When this environment variable is set to a non-zero value, all contexts created in that * process on devices that support managed memory have to be peer-to-peer compatible with each other. Context creation will fail if a context is * created on a device that supports managed memory and is not peer-to-peer compatible with any of the other managed memory supporting devices on * which contexts were previously created, even if those contexts have been destroyed. These environment variables are described in the CUDA * programming guide under the "CUDA environment variables" section.
  • *
  • On ARM, managed memory is not available on discrete gpu with Drive PX-2.
  • *
* * @param dptr returned device pointer * @param bytesize requested allocation size in bytes * @param flags must be one of {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} or {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} */ @NativeType("CUresult") public static int cuMemAllocManaged(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("unsigned int") int flags) { if (CHECKS) { check(dptr, 1); } return ncuMemAllocManaged(memAddress(dptr), bytesize, flags); } // --- [ cuDeviceGetByPCIBusId ] --- /** Unsafe version of: {@link #cuDeviceGetByPCIBusId DeviceGetByPCIBusId} */ public static int ncuDeviceGetByPCIBusId(long dev, long pciBusId) { long __functionAddress = Functions.DeviceGetByPCIBusId; if (CHECKS) { check(__functionAddress); } return callPPI(dev, pciBusId, __functionAddress); } /** * Returns a handle to a compute device. * *

Returns in {@code *device} a device handle given a PCI bus ID string.

* * @param dev returned device handle * @param pciBusId string in one of the following forms: {@code [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function]} where * {@code domain}, {@code bus}, {@code device}, and {@code function} are all hexadecimal values */ @NativeType("CUresult") public static int cuDeviceGetByPCIBusId(@NativeType("CUdevice *") IntBuffer dev, @NativeType("char const *") ByteBuffer pciBusId) { if (CHECKS) { check(dev, 1); checkNT1(pciBusId); } return ncuDeviceGetByPCIBusId(memAddress(dev), memAddress(pciBusId)); } /** * Returns a handle to a compute device. * *

Returns in {@code *device} a device handle given a PCI bus ID string.

* * @param dev returned device handle * @param pciBusId string in one of the following forms: {@code [domain]:[bus]:[device].[function] [domain]:[bus]:[device] [bus]:[device].[function]} where * {@code domain}, {@code bus}, {@code device}, and {@code function} are all hexadecimal values */ @NativeType("CUresult") public static int cuDeviceGetByPCIBusId(@NativeType("CUdevice *") IntBuffer dev, @NativeType("char const *") CharSequence pciBusId) { if (CHECKS) { check(dev, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nASCII(pciBusId, true); long pciBusIdEncoded = stack.getPointerAddress(); return ncuDeviceGetByPCIBusId(memAddress(dev), pciBusIdEncoded); } finally { stack.setPointer(stackPointer); } } // --- [ cuDeviceGetPCIBusId ] --- /** * Unsafe version of: {@link #cuDeviceGetPCIBusId DeviceGetPCIBusId} * * @param len maximum length of string to store in {@code name} */ public static int ncuDeviceGetPCIBusId(long pciBusId, int len, int dev) { long __functionAddress = Functions.DeviceGetPCIBusId; if (CHECKS) { check(__functionAddress); } return callPI(pciBusId, len, dev, __functionAddress); } /** * Returns a PCI Bus Id string for the device. * *

Returns an ASCII string identifying the device {@code dev} in the NULL-terminated string pointed to by {@code pciBusId}. {@code len} specifies the * maximum length of the string that may be returned.

* * @param pciBusId returned identifier string for the device in the following format {@code [domain]:[bus]:[device].[function]} where {@code domain}, {@code bus}, * {@code device}, and {@code function} are all hexadecimal values. {@code pciBusId} should be large enough to store 13 characters including the * NULL-terminator. * @param dev device to get identifier string for */ @NativeType("CUresult") public static int cuDeviceGetPCIBusId(@NativeType("char *") ByteBuffer pciBusId, @NativeType("CUdevice") int dev) { return ncuDeviceGetPCIBusId(memAddress(pciBusId), pciBusId.remaining(), dev); } // --- [ cuIpcGetEventHandle ] --- /** Unsafe version of: {@link #cuIpcGetEventHandle IpcGetEventHandle} */ public static int ncuIpcGetEventHandle(long pHandle, long event) { long __functionAddress = Functions.IpcGetEventHandle; if (CHECKS) { check(__functionAddress); check(event); } return callPPI(pHandle, event, __functionAddress); } /** * Gets an interprocess handle for a previously allocated event. * *

Takes as input a previously allocated event. This event must have been created with the {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} and {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flags set. This * opaque handle may be copied into other processes and opened with {@link #cuIpcOpenEventHandle IpcOpenEventHandle} to allow efficient hardware * synchronization between GPU work in different processes.

* *

After the event has been opened in the importing process, {@link #cuEventRecord EventRecord}, {@link #cuEventSynchronize EventSynchronize}, {@link #cuStreamWaitEvent StreamWaitEvent} and {@link #cuEventQuery EventQuery} may be used in * either process. Performing operations on the imported event after the exported event has been freed with {@link #cuEventDestroy EventDestroy} will result in undefined * behavior.

* *

IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is * restricted to GPUs in TCC mode.

* * @param pHandle pointer to a user allocated {@code CUipcEventHandle} in which to return the opaque event handle * @param event event allocated with {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} and {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flags */ @NativeType("CUresult") public static int cuIpcGetEventHandle(@NativeType("CUipcEventHandle *") CUIPCEventHandle pHandle, @NativeType("CUevent") long event) { return ncuIpcGetEventHandle(pHandle.address(), event); } // --- [ cuIpcOpenEventHandle$Address ] --- @NativeType("CUresult") private static int cuIpcOpenEventHandle$Address() { long __functionAddress = Functions.IpcOpenEventHandle$Address; if (CHECKS) { check(__functionAddress); } return callI(__functionAddress); } // --- [ cuIpcGetMemHandle ] --- /** Unsafe version of: {@link #cuIpcGetMemHandle IpcGetMemHandle} */ public static int ncuIpcGetMemHandle(long pHandle, long dptr) { long __functionAddress = Functions.IpcGetMemHandle; if (CHECKS) { check(__functionAddress); check(dptr); } return callPPI(pHandle, dptr, __functionAddress); } /** * Gets an interprocess memory handle for an existing device memory allocation. * *

Takes a pointer to the base of an existing device memory allocation created with {@link #cuMemAlloc MemAlloc} and exports it for use in another process. This is a * lightweight operation and may be called multiple times on an allocation without adverse effects.

* *

If a region of memory is freed with {@link #cuMemFree MemFree} and a subsequent call to {@link #cuMemAlloc MemAlloc} returns memory with the same device address, {@link #cuIpcGetMemHandle IpcGetMemHandle} * will return a unique handle for the new memory.

* *

IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is * restricted to GPUs in TCC mode.

* * @param pHandle pointer to user allocated {@code CUipcMemHandle} to return the handle in * @param dptr base pointer to previously allocated device memory */ @NativeType("CUresult") public static int cuIpcGetMemHandle(@NativeType("CUipcMemHandle *") CUIPCMemHandle pHandle, @NativeType("CUdeviceptr") long dptr) { return ncuIpcGetMemHandle(pHandle.address(), dptr); } // --- [ cuIpcOpenMemHandle$Address ] --- @NativeType("CUresult") private static int cuIpcOpenMemHandle$Address() { long __functionAddress = Functions.IpcOpenMemHandle$Address; if (CHECKS) { check(__functionAddress); } return callI(__functionAddress); } // --- [ cuIpcCloseMemHandle ] --- /** * Attempts to close memory mapped with {@link #cuIpcOpenMemHandle IpcOpenMemHandle}. * *

Decrements the reference count of the memory returned by {@code cuIpcOpenMemHandle()} by 1. When the reference count reaches 0, this API unmaps the * memory. The original allocation in the exporting process as well as imported mappings in other processes will be unaffected.

* *

Any resources used to enable peer access will be freed if this is the last mapping using them.

* *

IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is * restricted to GPUs in TCC mode

* * @param dptr device pointer returned by {@code cuIpcOpenMemHandle()} */ @NativeType("CUresult") public static int cuIpcCloseMemHandle(@NativeType("CUdeviceptr") long dptr) { long __functionAddress = Functions.IpcCloseMemHandle; if (CHECKS) { check(__functionAddress); check(dptr); } return callPI(dptr, __functionAddress); } // --- [ cuMemHostRegister ] --- /** * Unsafe version of: {@link #cuMemHostRegister MemHostRegister} * * @param bytesize size in bytes of the address range to page-lock */ public static int ncuMemHostRegister(long p, long bytesize, int Flags) { long __functionAddress = Functions.MemHostRegister; if (CHECKS) { check(__functionAddress); } return callPPI(p, bytesize, Flags, __functionAddress); } /** * Registers an existing host memory range for use by CUDA. * *

Page-locks the memory range specified by {@code p} and {@code bytesize} and maps it for the device(s) as specified by {@code Flags}. This memory range * also is added to the same tracking mechanism as {@link #cuMemHostAlloc MemHostAlloc} to automatically accelerate calls to functions such as {@link #cuMemcpyHtoD MemcpyHtoD}. Since the memory * can be accessed directly by the device, it can be read or written with much higher bandwidth than pageable memory that has not been registered. * Page-locking excessive amounts of memory may degrade system performance, since it reduces the amount of memory available to the system for paging. As a * result, this function is best used sparingly to register staging areas for data exchange between host and device.

* *

This function has limited support on Mac OS X. OS 10.7 or higher is required.

* *

All flags are orthogonal to one another: a developer may page-lock memory that is portable or mapped with no restrictions.

* *

The {@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP} flag may be specified on CUDA contexts for devices that do not support mapped pinned memory. The failure is deferred to * {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer} because the memory may be mapped into other CUDA contexts via the {@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE} flag.

* *

For devices that have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM}, the memory can also be * accessed from the device using the host pointer {@code p}. The device pointer returned by {@code cuMemHostGetDevicePointer()} may or may not match the * original host pointer {@code ptr} and depends on the devices visible to the application. If all devices visible to the application have a non-zero * value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will match the original pointer {@code ptr}. If any * device visible to the application has a zero value for the device attribute, the device pointer returned by {@code cuMemHostGetDevicePointer()} will * not match the original host pointer {@code ptr}, but it will be suitable for use on all devices provided Unified Virtual Addressing is enabled. In such * systems, it is valid to access the memory using either pointer on devices that have a non-zero value for the device attribute. Note however that such * devices should access the memory using only of the two pointers and not both.

* *

The memory page-locked by this function must be unregistered with {@link #cuMemHostUnregister MemHostUnregister}.

* * @param p host pointer to memory to page-lock * @param Flags flags for allocation request. One or more of:
{@link #CU_MEMHOSTREGISTER_PORTABLE MEMHOSTREGISTER_PORTABLE}{@link #CU_MEMHOSTREGISTER_DEVICEMAP MEMHOSTREGISTER_DEVICEMAP}{@link #CU_MEMHOSTREGISTER_IOMEMORY MEMHOSTREGISTER_IOMEMORY}
{@link #CU_MEMHOSTREGISTER_READ_ONLY MEMHOSTREGISTER_READ_ONLY}
*/ @NativeType("CUresult") public static int cuMemHostRegister(@NativeType("void *") ByteBuffer p, @NativeType("unsigned int") int Flags) { return ncuMemHostRegister(memAddress(p), p.remaining(), Flags); } // --- [ cuMemHostUnregister ] --- /** Unsafe version of: {@link #cuMemHostUnregister MemHostUnregister} */ public static int ncuMemHostUnregister(long p) { long __functionAddress = Functions.MemHostUnregister; if (CHECKS) { check(__functionAddress); } return callPI(p, __functionAddress); } /** * Unregisters a memory range that was registered with {@link #cuMemHostRegister MemHostRegister}. * *

Unmaps the memory range whose base address is specified by {@code p}, and makes it pageable again.

* *

The base address must be the same one specified to {@link #cuMemHostRegister MemHostRegister}.

* * @param p host pointer to memory to unregister */ @NativeType("CUresult") public static int cuMemHostUnregister(@NativeType("void *") ByteBuffer p) { return ncuMemHostUnregister(memAddress(p)); } // --- [ cuMemcpy ] --- /** * Copies memory. * *

Copies data between two pointers. {@code dst} and {@code src} are base pointers of the destination and source, respectively. {@code ByteCount} * specifies the number of bytes to copy. Note that this function infers the type of the transfer (host to host, host to device, device to device, or * device to host) from the pointer values. This function is only allowed in contexts which support unified addressing.

* * @param dst destination unified virtual address space pointer * @param src source unified virtual address space pointer * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpy(@NativeType("CUdeviceptr") long dst, @NativeType("CUdeviceptr") long src, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.Memcpy; if (CHECKS) { check(__functionAddress); check(dst); check(src); } return callPPPI(dst, src, ByteCount, __functionAddress); } // --- [ cuMemcpyPeer ] --- /** * Copies device memory between two contexts. * *

Copies from device memory in one context to device memory in another context. {@code dstDevice} is the base device pointer of the destination memory * and {@code dstContext} is the destination context. {@code srcDevice} is the base device pointer of the source memory and {@code srcContext} is the * source pointer. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param dstContext destination context * @param srcDevice source device pointer * @param srcContext source context * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpyPeer(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUcontext") long dstContext, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUcontext") long srcContext, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.MemcpyPeer; if (CHECKS) { check(__functionAddress); check(dstDevice); check(dstContext); check(srcDevice); check(srcContext); } return callPPPPPI(dstDevice, dstContext, srcDevice, srcContext, ByteCount, __functionAddress); } // --- [ cuMemcpyHtoD ] --- /** * Unsafe version of: {@link #cuMemcpyHtoD MemcpyHtoD} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyHtoD(long dstDevice, long srcHost, long ByteCount) { long __functionAddress = Functions.MemcpyHtoD; if (CHECKS) { check(dstDevice); } return callPPPI(dstDevice, srcHost, ByteCount, __functionAddress); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ByteBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), srcHost.remaining()); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ShortBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") IntBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") LongBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") FloatBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") DoubleBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") PointerBuffer srcHost) { return ncuMemcpyHtoD(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT); } // --- [ cuMemcpyDtoH ] --- /** * Unsafe version of: {@link #cuMemcpyDtoH MemcpyDtoH} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyDtoH(long dstHost, long srcDevice, long ByteCount) { long __functionAddress = Functions.MemcpyDtoH; if (CHECKS) { check(srcDevice); } return callPPPI(dstHost, srcDevice, ByteCount, __functionAddress); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, dstHost.remaining()); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 1); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") IntBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") LongBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer */ @NativeType("CUresult") public static int cuMemcpyDtoH(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice) { return ncuMemcpyDtoH(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT); } // --- [ cuMemcpyDtoD ] --- /** * Copies memory from Device to Device. * *

Copies from device memory to device memory. {@code dstDevice} and {@code srcDevice} are the base pointers of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcDevice source device pointer * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpyDtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.MemcpyDtoD; if (CHECKS) { check(dstDevice); check(srcDevice); } return callPPPI(dstDevice, srcDevice, ByteCount, __functionAddress); } // --- [ cuMemcpyDtoA ] --- /** * Copies memory from Device to Array. * *

Copies from device memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting index of the * destination data. {@code srcDevice} specifies the base pointer of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcDevice source device pointer * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpyDtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.MemcpyDtoA; if (CHECKS) { check(dstArray); check(srcDevice); } return callPPPPI(dstArray, dstOffset, srcDevice, ByteCount, __functionAddress); } // --- [ cuMemcpyAtoD ] --- /** * Copies memory from Array to Device. * *

Copies from one 1D CUDA array to device memory. {@code dstDevice} specifies the base pointer of the destination and must be naturally aligned with the * CUDA array elements. {@code srcArray} and {@code srcOffset} specify the CUDA array handle and the offset in bytes into the array where the copy is to * begin. {@code ByteCount} specifies the number of bytes to copy and must be evenly divisible by the array element size.

* * @param dstDevice destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpyAtoD(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.MemcpyAtoD; if (CHECKS) { check(dstDevice); check(srcArray); } return callPPPPI(dstDevice, srcArray, srcOffset, ByteCount, __functionAddress); } // --- [ cuMemcpyHtoA ] --- /** * Unsafe version of: {@link #cuMemcpyHtoA MemcpyHtoA} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyHtoA(long dstArray, long dstOffset, long srcHost, long ByteCount) { long __functionAddress = Functions.MemcpyHtoA; if (CHECKS) { check(dstArray); } return callPPPPI(dstArray, dstOffset, srcHost, ByteCount, __functionAddress); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ByteBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), srcHost.remaining()); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ShortBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") IntBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") LongBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") FloatBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") DoubleBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code pSrc} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer */ @NativeType("CUresult") public static int cuMemcpyHtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") PointerBuffer srcHost) { return ncuMemcpyHtoA(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT); } // --- [ cuMemcpyAtoH ] --- /** * Unsafe version of: {@link #cuMemcpyAtoH MemcpyAtoH} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyAtoH(long dstHost, long srcArray, long srcOffset, long ByteCount) { long __functionAddress = Functions.MemcpyAtoH; if (CHECKS) { check(srcArray); } return callPPPPI(dstHost, srcArray, srcOffset, ByteCount, __functionAddress); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, dstHost.remaining()); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 1); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") IntBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") LongBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination device pointer * @param srcArray source array * @param srcOffset offset in bytes of source array */ @NativeType("CUresult") public static int cuMemcpyAtoH(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset) { return ncuMemcpyAtoH(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT); } // --- [ cuMemcpyAtoA ] --- /** * Copies memory from Array to Array. * *

Copies from one 1D CUDA array to another. {@code dstArray} and {@code srcArray} specify the handles of the destination and source CUDA arrays for the * copy, respectively. {@code dstOffset} and {@code srcOffset} specify the destination and source offsets in bytes into the CUDA arrays. {@code ByteCount} * is the number of bytes to be copied. The size of the elements in the CUDA arrays need not be the same format, but the elements must be the same size; * and count must be evenly divisible by that size.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcArray source array * @param srcOffset offset in bytes of source array * @param ByteCount size of memory copy in bytes */ @NativeType("CUresult") public static int cuMemcpyAtoA(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("size_t") long ByteCount) { long __functionAddress = Functions.MemcpyAtoA; if (CHECKS) { check(dstArray); check(srcArray); } return callPPPPPI(dstArray, dstOffset, srcArray, srcOffset, ByteCount, __functionAddress); } // --- [ cuMemcpy2D ] --- /** Unsafe version of: {@link #cuMemcpy2D Memcpy2D} */ public static int ncuMemcpy2D(long pCopy) { long __functionAddress = Functions.Memcpy2D; return callPI(pCopy, __functionAddress); } /** * Copies memory for 2D arrays. * *

Perform a 2D memory copy according to the parameters specified in {@code pCopy}.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per * row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the * bytes per row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and * {@code srcPitch} are ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the * bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and * the bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and * {@code dstPitch} are ignored.

* *

{@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.

* *

For host pointers, the starting address is

* *

     * voidStart = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* *

For device pointers, the starting address is

* *

     * CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* *

For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.

* *

{@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.

* *

For host pointers, the base address is

* *

     * voiddstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* *

For device pointers, the starting address is

* *

     * CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* *

For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.

* *

{@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

{@code cuMemcpy2D()} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@code cuMemAllocPitch}() passes back * pitches that always work with {@code cuMemcpy2D()}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array), * {@code cuMemcpy2D()} may fail for pitches not computed by {@link #cuMemAllocPitch MemAllocPitch}. {@link #cuMemcpy2DUnaligned Memcpy2DUnaligned} does not have this restriction, but may run * significantly slower in the cases where {@code cuMemcpy2D()} would have returned an error code.

* * @param pCopy parameters for the memory copy */ @NativeType("CUresult") public static int cuMemcpy2D(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy) { return ncuMemcpy2D(pCopy.address()); } // --- [ cuMemcpy2DUnaligned ] --- /** Unsafe version of: {@link #cuMemcpy2DUnaligned Memcpy2DUnaligned} */ public static int ncuMemcpy2DUnaligned(long pCopy) { long __functionAddress = Functions.Memcpy2DUnaligned; return callPI(pCopy, __functionAddress); } /** * Copies memory for 2D arrays. * *

Perform a 2D memory copy according to the parameters specified in {@code pCopy}.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per * row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the * bytes per row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and * {@code srcPitch} are ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the * bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and * the bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and * {@code dstPitch} are ignored.

* *

{@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.

* *

For host pointers, the starting address is

* *

     *   void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* *

For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.

* *

{@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.

* *

For host pointers, the base address is

* *

     *   void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* *

For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.

* *

{@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

{@link #cuMemcpy2D Memcpy2D} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@link #cuMemAllocPitch MemAllocPitch} passes back pitches that * always work with {@code cuMemcpy2D()}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array), * {@code cuMemcpy2D()} may fail for pitches not computed by {@code cuMemAllocPitch()}. {@code cuMemcpy2DUnaligned()} does not have this restriction, but * may run significantly slower in the cases where {@code cuMemcpy2D()} would have returned an error code.

* * @param pCopy parameters for the memory copy */ @NativeType("CUresult") public static int cuMemcpy2DUnaligned(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy) { return ncuMemcpy2DUnaligned(pCopy.address()); } // --- [ cuMemcpy3D ] --- /** Unsafe version of: {@link #cuMemcpy3D Memcpy3D} */ public static int ncuMemcpy3D(long pCopy) { long __functionAddress = Functions.Memcpy3D; return callPI(pCopy, __functionAddress); } /** * Copies memory for 3D arrays. * *

Perform a 3D memory copy according to the parameters specified in {@code pCopy}.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost}, {@code srcPitch} and {@code srcHeight} specify the (host) base address of the source * data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice}, {@code srcPitch} and {@code srcHeight} specify the (device) base address of the * source data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice}, * {@code srcPitch} and {@code srcHeight} are ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data, the bytes * per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data, the * bytes per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice}, * {@code dstPitch} and {@code dstHeight} are ignored.

* *

{@code srcXInBytes}, {@code srcY} and {@code srcZ} specify the base address of the source data for the copy.

* *

For host pointers, the starting address is

* *

     *   void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
* *

For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.

* *

{@code dstXInBytes}, {@code dstY} and {@code dstZ} specify the base address of the destination data for the copy.

* *

For host pointers, the base address is

* *

     *   void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
* *

For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.

* *

{@code WidthInBytes}, {@code Height} and {@code Depth} specify the width (in bytes), height and depth of the 3D copy being performed.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to * {@code Height} + {@code dstY}.

* *

{@link #cuMemcpy3D Memcpy3D} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}).

* *

The {@code srcLOD} and {@code dstLOD} members of the {@code CUDA_MEMCPY3D} structure must be set to 0.

* *
Note
* *

_sync

* * @param pCopy parameters for the memory copy */ @NativeType("CUresult") public static int cuMemcpy3D(@NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D pCopy) { return ncuMemcpy3D(pCopy.address()); } // --- [ cuMemcpy3DPeer ] --- /** Unsafe version of: {@link #cuMemcpy3DPeer Memcpy3DPeer} */ public static int ncuMemcpy3DPeer(long pCopy) { long __functionAddress = Functions.Memcpy3DPeer; if (CHECKS) { check(__functionAddress); } return callPI(pCopy, __functionAddress); } /** * Copies memory between contexts. * *

Perform a 3D memory copy according to the parameters specified in {@code pCopy}.

* * @param pCopy parameters for the memory copy */ @NativeType("CUresult") public static int cuMemcpy3DPeer(@NativeType("CUDA_MEMCPY3D_PEER const *") CUDA_MEMCPY3D_PEER pCopy) { return ncuMemcpy3DPeer(pCopy.address()); } // --- [ cuMemcpyAsync ] --- /** * Copies memory asynchronously. * *

Copies data between two pointers. {@code dst} and {@code src} are base pointers of the destination and source, respectively. {@code ByteCount} * specifies the number of bytes to copy. Note that this function infers the type of the transfer (host to host, host to device, device to device, or * device to host) from the pointer values. This function is only allowed in contexts which support unified addressing.

* * @param dst destination unified virtual address space pointer * @param src source unified virtual address space pointer * @param ByteCount size of memory copy in bytes * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAsync(@NativeType("CUdeviceptr") long dst, @NativeType("CUdeviceptr") long src, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemcpyAsync; if (CHECKS) { check(__functionAddress); check(dst); check(src); } return callPPPPI(dst, src, ByteCount, hStream, __functionAddress); } // --- [ cuMemcpyPeerAsync ] --- /** * Copies device memory between two contexts asynchronously. * *

Copies from device memory in one context to device memory in another context. {@code dstDevice} is the base device pointer of the destination memory * and {@code dstContext} is the destination context. {@code srcDevice} is the base device pointer of the source memory and {@code srcContext} is the * source pointer. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param dstContext destination context * @param srcDevice source device pointer * @param srcContext source context * @param ByteCount size of memory copy in bytes * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyPeerAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUcontext") long dstContext, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUcontext") long srcContext, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemcpyPeerAsync; if (CHECKS) { check(__functionAddress); check(dstDevice); check(dstContext); check(srcDevice); check(srcContext); } return callPPPPPPI(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream, __functionAddress); } // --- [ cuMemcpyHtoDAsync ] --- /** * Unsafe version of: {@link #cuMemcpyHtoDAsync MemcpyHtoDAsync} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyHtoDAsync(long dstDevice, long srcHost, long ByteCount, long hStream) { long __functionAddress = Functions.MemcpyHtoDAsync; if (CHECKS) { check(dstDevice); } return callPPPPI(dstDevice, srcHost, ByteCount, hStream, __functionAddress); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ByteBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), srcHost.remaining(), hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") ShortBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1, hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") IntBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") LongBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") FloatBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") DoubleBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream); } /** * Copies memory from Host to Device. * *

Copies from host memory to device memory. {@code dstDevice} and {@code srcHost} are the base addresses of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("void const *") PointerBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoDAsync(dstDevice, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT, hStream); } // --- [ cuMemcpyDtoHAsync ] --- /** * Unsafe version of: {@link #cuMemcpyDtoHAsync MemcpyDtoHAsync} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyDtoHAsync(long dstHost, long srcDevice, long ByteCount, long hStream) { long __functionAddress = Functions.MemcpyDtoHAsync; if (CHECKS) { check(srcDevice); } return callPPPPI(dstHost, srcDevice, ByteCount, hStream, __functionAddress); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, dstHost.remaining(), hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 1, hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") IntBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") LongBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream); } /** * Copies memory from Device to Host. * *

Copies from device to host memory. {@code dstHost} and {@code srcDevice} specify the base pointers of the destination and source, respectively. {@code * ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination host pointer * @param srcDevice source device pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoHAsync(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUdeviceptr") long srcDevice, @NativeType("CUstream") long hStream) { return ncuMemcpyDtoHAsync(memAddress(dstHost), srcDevice, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT, hStream); } // --- [ cuMemcpyDtoDAsync ] --- /** * Copies memory from Device to Device. * *

Copies from device memory to device memory. {@code dstDevice} and {@code srcDevice} are the base pointers of the destination and source, respectively. * {@code ByteCount} specifies the number of bytes to copy.

* * @param dstDevice destination device pointer * @param srcDevice source device pointer * @param ByteCount size of memory copy in bytes * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyDtoDAsync(@NativeType("CUdeviceptr") long dstDevice, @NativeType("CUdeviceptr") long srcDevice, @NativeType("size_t") long ByteCount, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemcpyDtoDAsync; if (CHECKS) { check(dstDevice); check(srcDevice); } return callPPPPI(dstDevice, srcDevice, ByteCount, hStream, __functionAddress); } // --- [ cuMemcpyHtoAAsync ] --- /** * Unsafe version of: {@link #cuMemcpyHtoAAsync MemcpyHtoAAsync} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyHtoAAsync(long dstArray, long dstOffset, long srcHost, long ByteCount, long hStream) { long __functionAddress = Functions.MemcpyHtoAAsync; if (CHECKS) { check(dstArray); } return callPPPPPI(dstArray, dstOffset, srcHost, ByteCount, hStream, __functionAddress); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ByteBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), srcHost.remaining(), hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") ShortBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 1, hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") IntBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") LongBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") FloatBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 2, hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") DoubleBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << 3, hStream); } /** * Copies memory from Host to Array. * *

Copies from host memory to a 1D CUDA array. {@code dstArray} and {@code dstOffset} specify the CUDA array handle and starting offset in bytes of the * destination data. {@code srcHost} specifies the base address of the source. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstArray destination array * @param dstOffset offset in bytes of destination array * @param srcHost source host pointer * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyHtoAAsync(@NativeType("CUarray") long dstArray, @NativeType("size_t") long dstOffset, @NativeType("void const *") PointerBuffer srcHost, @NativeType("CUstream") long hStream) { return ncuMemcpyHtoAAsync(dstArray, dstOffset, memAddress(srcHost), Integer.toUnsignedLong(srcHost.remaining()) << POINTER_SHIFT, hStream); } // --- [ cuMemcpyAtoHAsync ] --- /** * Unsafe version of: {@link #cuMemcpyAtoHAsync MemcpyAtoHAsync} * * @param ByteCount size of memory copy in bytes */ public static int ncuMemcpyAtoHAsync(long dstHost, long srcArray, long srcOffset, long ByteCount, long hStream) { long __functionAddress = Functions.MemcpyAtoHAsync; if (CHECKS) { check(srcArray); } return callPPPPPI(dstHost, srcArray, srcOffset, ByteCount, hStream, __functionAddress); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") ByteBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, dstHost.remaining(), hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") ShortBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 1, hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") IntBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") LongBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") FloatBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 2, hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") DoubleBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << 3, hStream); } /** * Copies memory from Array to Host. * *

Copies from one 1D CUDA array to host memory. {@code dstHost} specifies the base pointer of the destination. {@code srcArray} and {@code srcOffset} * specify the CUDA array handle and starting offset in bytes of the source data. {@code ByteCount} specifies the number of bytes to copy.

* * @param dstHost destination pointer * @param srcArray source array * @param srcOffset offset in bytes of source array * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpyAtoHAsync(@NativeType("void *") PointerBuffer dstHost, @NativeType("CUarray") long srcArray, @NativeType("size_t") long srcOffset, @NativeType("CUstream") long hStream) { return ncuMemcpyAtoHAsync(memAddress(dstHost), srcArray, srcOffset, Integer.toUnsignedLong(dstHost.remaining()) << POINTER_SHIFT, hStream); } // --- [ cuMemcpy2DAsync ] --- /** Unsafe version of: {@link #cuMemcpy2DAsync Memcpy2DAsync} */ public static int ncuMemcpy2DAsync(long pCopy, long hStream) { long __functionAddress = Functions.Memcpy2DAsync; return callPPI(pCopy, hStream, __functionAddress); } /** * Copies memory for 2D arrays. * *

Perform a 2D memory copy according to the parameters specified in {@code pCopy}.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost} and {@code srcPitch} specify the (host) base address of the source data and the bytes per * row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice} and {@code srcPitch} specify the (device) base address of the source data and the * bytes per row to apply. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice} and * {@code srcPitch} are ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data and the * bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data and * the bytes per row to apply. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice} and * {@code dstPitch} are ignored.

* *

{@code srcXInBytes} and {@code srcY} specify the base address of the source data for the copy.

* *

For host pointers, the starting address is

* *

     *   void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* *

For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.

* *

{@code dstXInBytes} and {@code dstY} specify the base address of the destination data for the copy.

* *

For host pointers, the base address is

* *

     *   void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* *

For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.

* *

{@code WidthInBytes} and {@code Height} specify the width (in bytes) and height of the 2D copy being performed.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to * {@code Height} + {@code dstY}.

* *

{@code cuMemcpy2DAsync()} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}). {@link #cuMemAllocPitch MemAllocPitch} passes back * pitches that always work with {@link #cuMemcpy2D Memcpy2D}. On intra-device memory copies (device to device, CUDA array to device, CUDA array to CUDA array), * {@code cuMemcpy2DAsync()} may fail for pitches not computed by {@code cuMemAllocPitch()}.

* * @param pCopy parameters for the memory copy * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpy2DAsync(@NativeType("CUDA_MEMCPY2D const *") CUDA_MEMCPY2D pCopy, @NativeType("CUstream") long hStream) { return ncuMemcpy2DAsync(pCopy.address(), hStream); } // --- [ cuMemcpy3DAsync ] --- /** Unsafe version of: {@link #cuMemcpy3DAsync Memcpy3DAsync} */ public static int ncuMemcpy3DAsync(long pCopy, long hStream) { long __functionAddress = Functions.Memcpy3DAsync; return callPPI(pCopy, hStream, __functionAddress); } /** * Copies memory for 3D arrays. * *

Perform a 3D memory copy according to the parameters specified in {@code pCopy}.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code srcDevice} and {@code srcPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code srcArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code srcHost}, {@code srcPitch} and {@code srcHeight} specify the (host) base address of the source * data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code srcDevice}, {@code srcPitch} and {@code srcHeight} specify the (device) base address of the * source data, the bytes per row, and the height of each 2D slice of the 3D array. {@code srcArray} is ignored.

* *

If {@code srcMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code srcArray} specifies the handle of the source data. {@code srcHost}, {@code srcDevice}, * {@code srcPitch} and {@code srcHeight} are ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED}, {@code dstDevice} and {@code dstPitch} specify the (unified virtual address space) base address of the * source data and the bytes per row to apply. {@code dstArray} is ignored. This value may be used only if unified addressing is supported in the calling * context.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}, {@code dstHost} and {@code dstPitch} specify the (host) base address of the destination data, the bytes * per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}, {@code dstDevice} and {@code dstPitch} specify the (device) base address of the destination data, the * bytes per row, and the height of each 2D slice of the 3D array. {@code dstArray} is ignored.

* *

If {@code dstMemoryType} is {@link #CU_MEMORYTYPE_ARRAY MEMORYTYPE_ARRAY}, {@code dstArray} specifies the handle of the destination data. {@code dstHost}, {@code dstDevice}, * {@code dstPitch} and {@code dstHeight} are ignored.

* *

- {@code srcXInBytes}, {@code srcY} and {@code srcZ} specify the base address of the source data for the copy.

* *

For host pointers, the starting address is

* *

     *   void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
* *

For CUDA arrays, {@code srcXInBytes} must be evenly divisible by the array element size.

* *

{@code dstXInBytes}, {@code dstY} and {@code dstZ} specify the base address of the destination data for the copy.

* *

For host pointers, the base address is

* *

     *   void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
* *

For device pointers, the starting address is

* *

     *   CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
* *

For CUDA arrays, {@code dstXInBytes} must be evenly divisible by the array element size.

* *

{@code {@}code WidthInBytes}, {@code Height} and {@code Depth} specify the width (in bytes), height and depth of the 3D copy being performed.

* *

If specified, {@code srcPitch} must be greater than or equal to {@code WidthInBytes} + {@code srcXInBytes}, and {@code dstPitch} must be greater than * or equal to {@code WidthInBytes} + {@code dstXInBytes}.

* *

If specified, {@code srcHeight} must be greater than or equal to {@code Height} + {@code srcY}, and {@code dstHeight} must be greater than or equal to * {@code Height} + {@code dstY}.

* *

{@link #cuMemcpy3DAsync Memcpy3DAsync} returns an error if any pitch is greater than the maximum allowed ({@link #CU_DEVICE_ATTRIBUTE_MAX_PITCH DEVICE_ATTRIBUTE_MAX_PITCH}).

* *

The {@code srcLOD} and {@code dstLOD} members of the {@code CUDA_MEMCPY3D} structure must be set to 0.

* * @param pCopy parameters for the memory copy * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpy3DAsync(@NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D pCopy, @NativeType("CUstream") long hStream) { return ncuMemcpy3DAsync(pCopy.address(), hStream); } // --- [ cuMemcpy3DPeerAsync ] --- /** Unsafe version of: {@link #cuMemcpy3DPeerAsync Memcpy3DPeerAsync} */ public static int ncuMemcpy3DPeerAsync(long pCopy, long hStream) { long __functionAddress = Functions.Memcpy3DPeerAsync; if (CHECKS) { check(__functionAddress); } return callPPI(pCopy, hStream, __functionAddress); } /** * Copies memory between contexts asynchronously. * *

Perform a 3D memory copy according to the parameters specified in {@code pCopy}.

* * @param pCopy parameters for the memory copy * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemcpy3DPeerAsync(@NativeType("CUDA_MEMCPY3D_PEER const *") CUDA_MEMCPY3D_PEER pCopy, @NativeType("CUstream") long hStream) { return ncuMemcpy3DPeerAsync(pCopy.address(), hStream); } // --- [ cuMemsetD8 ] --- /** * Initializes device memory. * *

Sets the memory range of {@code N} 8-bit values to the specified value {@code uc}.

* * @param dstDevice destination device pointer * @param uc value to set * @param N number of elements */ @NativeType("CUresult") public static int cuMemsetD8(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned char") byte uc, @NativeType("size_t") long N) { long __functionAddress = Functions.MemsetD8; if (CHECKS) { check(dstDevice); } return callPPI(dstDevice, uc, N, __functionAddress); } // --- [ cuMemsetD16 ] --- /** * Initializes device memory. * *

Sets the memory range of {@code N} 16-bit values to the specified value {@code us}. The {@code dstDevice} pointer must be two byte aligned.

* * @param dstDevice destination device pointer * @param us value to set * @param N number of elements */ @NativeType("CUresult") public static int cuMemsetD16(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned short") short us, @NativeType("size_t") long N) { long __functionAddress = Functions.MemsetD16; if (CHECKS) { check(dstDevice); } return callPPI(dstDevice, us, N, __functionAddress); } // --- [ cuMemsetD32 ] --- /** * Initializes device memory * *

Sets the memory range of {@code N} 32-bit values to the specified value {@code ui}. The {@code dstDevice} pointer must be four byte aligned.

* * @param dstDevice destination device pointer * @param ui value to set * @param N number of elements */ @NativeType("CUresult") public static int cuMemsetD32(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned int") int ui, @NativeType("size_t") long N) { long __functionAddress = Functions.MemsetD32; if (CHECKS) { check(dstDevice); } return callPPI(dstDevice, ui, N, __functionAddress); } // --- [ cuMemsetD2D8 ] --- /** * Initializes device memory. * *

Sets the 2D memory range of {@code Width} 8-bit values to the specified value {@code uc}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. This function performs fastest when the pitch is one that has been passed back by * {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param uc value to set * @param Width width of row * @param Height number of rows */ @NativeType("CUresult") public static int cuMemsetD2D8(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned char") byte uc, @NativeType("size_t") long Width, @NativeType("size_t") long Height) { long __functionAddress = Functions.MemsetD2D8; if (CHECKS) { check(dstDevice); } return callPPPPI(dstDevice, dstPitch, uc, Width, Height, __functionAddress); } // --- [ cuMemsetD2D16 ] --- /** * Initializes device memory. * *

Sets the 2D memory range of {@code Width} 16-bit values to the specified value {@code us}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be two byte aligned. * This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param us value to set * @param Width width of row * @param Height number of rows */ @NativeType("CUresult") public static int cuMemsetD2D16(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned short") short us, @NativeType("size_t") long Width, @NativeType("size_t") long Height) { long __functionAddress = Functions.MemsetD2D16; if (CHECKS) { check(dstDevice); } return callPPPPI(dstDevice, dstPitch, us, Width, Height, __functionAddress); } // --- [ cuMemsetD2D32 ] --- /** * Initializes device memory. * *

Sets the 2D memory range of {@code Width} 32-bit values to the specified value {@code ui}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be four byte aligned. * This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param ui value to set * @param Width width of row * @param Height number of rows */ @NativeType("CUresult") public static int cuMemsetD2D32(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned int") int ui, @NativeType("size_t") long Width, @NativeType("size_t") long Height) { long __functionAddress = Functions.MemsetD2D32; if (CHECKS) { check(dstDevice); } return callPPPPI(dstDevice, dstPitch, ui, Width, Height, __functionAddress); } // --- [ cuMemsetD8Async ] --- /** * Sets device memory * *

Sets the memory range of {@code N} 8-bit values to the specified value {@code uc}.

* * @param dstDevice destination device pointer * @param uc value to set * @param N number of elements * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD8Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned char") byte uc, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD8Async; if (CHECKS) { check(dstDevice); } return callPPPI(dstDevice, uc, N, hStream, __functionAddress); } // --- [ cuMemsetD16Async ] --- /** * Sets device memory * *

Sets the memory range of {@code N} 16-bit values to the specified value {@code us}. The {@code dstDevice} pointer must be two byte aligned.

* * @param dstDevice destination device pointer * @param us value to set * @param N number of elements * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD16Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned short") short us, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD16Async; if (CHECKS) { check(dstDevice); } return callPPPI(dstDevice, us, N, hStream, __functionAddress); } // --- [ cuMemsetD32Async ] --- /** * Sets device memory. * *

Sets the memory range of {@code N} 32-bit values to the specified value {@code ui}. The {@code dstDevice} pointer must be four byte aligned.

* * @param dstDevice destination device pointer * @param ui value to set * @param N number of elements * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD32Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("unsigned int") int ui, @NativeType("size_t") long N, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD32Async; if (CHECKS) { check(dstDevice); } return callPPPI(dstDevice, ui, N, hStream, __functionAddress); } // --- [ cuMemsetD2D8Async ] --- /** * Sets device memory. * *

Sets the 2D memory range of {@code Width} 8-bit values to the specified value {@code uc}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. This function performs fastest when the pitch is one that has been passed back by * {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param uc value to set * @param Width width of row * @param Height number of rows * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD2D8Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned char") byte uc, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD2D8Async; if (CHECKS) { check(dstDevice); } return callPPPPPI(dstDevice, dstPitch, uc, Width, Height, hStream, __functionAddress); } // --- [ cuMemsetD2D16Async ] --- /** * Sets device memory. * *

Sets the 2D memory range of {@code Width} 16-bit values to the specified value {@code us}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be two byte aligned. * This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param us value to set * @param Width width of row * @param Height number of rows * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD2D16Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned short") short us, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD2D16Async; if (CHECKS) { check(dstDevice); } return callPPPPPI(dstDevice, dstPitch, us, Width, Height, hStream, __functionAddress); } // --- [ cuMemsetD2D32Async ] --- /** * Sets device memory. * *

Sets the 2D memory range of {@code Width} 32-bit values to the specified value {@code ui}. {@code Height} specifies the number of rows to set, and * {@code dstPitch} specifies the number of bytes between each row. The {@code dstDevice} pointer and {@code dstPitch} offset must be four byte aligned. * This function performs fastest when the pitch is one that has been passed back by {@link #cuMemAllocPitch MemAllocPitch}.

* * @param dstDevice destination device pointer * @param dstPitch pitch of destination device pointer(Unused if {@code Height} is 1) * @param ui value to set * @param Width width of row * @param Height number of rows * @param hStream stream identifier */ @NativeType("CUresult") public static int cuMemsetD2D32Async(@NativeType("CUdeviceptr") long dstDevice, @NativeType("size_t") long dstPitch, @NativeType("unsigned int") int ui, @NativeType("size_t") long Width, @NativeType("size_t") long Height, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemsetD2D32Async; if (CHECKS) { check(dstDevice); } return callPPPPPI(dstDevice, dstPitch, ui, Width, Height, hStream, __functionAddress); } // --- [ cuArrayCreate ] --- /** Unsafe version of: {@link #cuArrayCreate ArrayCreate} */ public static int ncuArrayCreate(long pHandle, long pAllocateArray) { long __functionAddress = Functions.ArrayCreate; return callPPI(pHandle, pAllocateArray, __functionAddress); } /** * Creates a 1D or 2D CUDA array. * *

Creates a CUDA array according to the {@code CUDA_ARRAY_DESCRIPTOR} structure {@code pAllocateArray} and returns a handle to the new CUDA array in * {@code *pHandle}.

* * @param pHandle returned array * @param pAllocateArray array descriptor */ @NativeType("CUresult") public static int cuArrayCreate(@NativeType("CUarray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY_DESCRIPTOR const *") CUDA_ARRAY_DESCRIPTOR pAllocateArray) { if (CHECKS) { check(pHandle, 1); } return ncuArrayCreate(memAddress(pHandle), pAllocateArray.address()); } // --- [ cuArrayGetDescriptor ] --- /** Unsafe version of: {@link #cuArrayGetDescriptor ArrayGetDescriptor} */ public static int ncuArrayGetDescriptor(long pArrayDescriptor, long hArray) { long __functionAddress = Functions.ArrayGetDescriptor; if (CHECKS) { check(hArray); } return callPPI(pArrayDescriptor, hArray, __functionAddress); } /** * Get a 1D or 2D CUDA array descriptor. * *

Returns in {@code *pArrayDescriptor} a descriptor containing information on the format and dimensions of the CUDA array {@code hArray}. It is useful * for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.

* * @param pArrayDescriptor returned array descriptor * @param hArray array to get descriptor of */ @NativeType("CUresult") public static int cuArrayGetDescriptor(@NativeType("CUDA_ARRAY_DESCRIPTOR *") CUDA_ARRAY_DESCRIPTOR pArrayDescriptor, @NativeType("CUarray") long hArray) { return ncuArrayGetDescriptor(pArrayDescriptor.address(), hArray); } // --- [ cuArrayGetSparseProperties ] --- /** Unsafe version of: {@link #cuArrayGetSparseProperties ArrayGetSparseProperties} */ public static int ncuArrayGetSparseProperties(long sparseProperties, long array) { long __functionAddress = Functions.ArrayGetSparseProperties; if (CHECKS) { check(__functionAddress); check(array); } return callPPI(sparseProperties, array, __functionAddress); } /** * Returns the layout properties of a sparse CUDA array. * *

Returns the layout properties of a sparse CUDA array in {@code sparseProperties} If the CUDA array is not allocated with flag {@link #CUDA_ARRAY3D_SPARSE} * {@link #CUDA_ERROR_INVALID_VALUE} will be returned.

* *

If the returned value in {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code flags} contains {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL}, then * {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} represents the total size of the array. Otherwise, it will be zero. Also, the returned value in * {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel} is always zero. Note that the {@code array} must have been allocated using {@link #cuArrayCreate ArrayCreate} or * {@link #cuArray3DCreate Array3DCreate}. For CUDA arrays obtained using {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel}, {@link #CUDA_ERROR_INVALID_VALUE} will be returned. Instead, * {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties} must be used to obtain the sparse properties of the entire CUDA mipmapped array to which {@code array} belongs to.

* * @param sparseProperties pointer to {@code CUDA_ARRAY_SPARSE_PROPERTIES} * @param array CUDA array to get the sparse properties of */ @NativeType("CUresult") public static int cuArrayGetSparseProperties(@NativeType("CUDA_ARRAY_SPARSE_PROPERTIES *") CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, @NativeType("CUarray") long array) { return ncuArrayGetSparseProperties(sparseProperties.address(), array); } // --- [ cuMipmappedArrayGetSparseProperties ] --- /** Unsafe version of: {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties} */ public static int ncuMipmappedArrayGetSparseProperties(long sparseProperties, long mipmap) { long __functionAddress = Functions.MipmappedArrayGetSparseProperties; if (CHECKS) { check(__functionAddress); check(mipmap); } return callPPI(sparseProperties, mipmap, __functionAddress); } /** * Returns the layout properties of a sparse CUDA mipmapped array. * *

Returns the sparse array layout properties in {@code sparseProperties} If the CUDA mipmapped array is not allocated with flag {@link #CUDA_ARRAY3D_SPARSE} * {@link #CUDA_ERROR_INVALID_VALUE} will be returned.

* *

For non-layered CUDA mipmapped arrays, {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code ::miptailSize} returns the size of the mip tail region. The mip tail region * includes all mip levels whose width, height or depth is less than that of the tile. For layered CUDA mipmapped arrays, if * {@code CUDA_ARRAY_SPARSE_PROPERTIES::flags} contains {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL}, then {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} * specifies the size of the mip tail of all layers combined. Otherwise, {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} specifies mip tail size per * layer. The returned value of {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel} is valid only if {@code CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize} * is non-zero.

* * @param sparseProperties pointer to {@code CUDA_ARRAY_SPARSE_PROPERTIES} * @param mipmap CUDA mipmapped array to get the sparse properties of */ @NativeType("CUresult") public static int cuMipmappedArrayGetSparseProperties(@NativeType("CUDA_ARRAY_SPARSE_PROPERTIES *") CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties, @NativeType("CUmipmappedArray") long mipmap) { return ncuMipmappedArrayGetSparseProperties(sparseProperties.address(), mipmap); } // --- [ cuArrayGetPlane ] --- /** Unsafe version of: {@link #cuArrayGetPlane ArrayGetPlane} */ public static int ncuArrayGetPlane(long pPlaneArray, long hArray, int planeIdx) { long __functionAddress = Functions.ArrayGetPlane; if (CHECKS) { check(__functionAddress); check(hArray); } return callPPI(pPlaneArray, hArray, planeIdx, __functionAddress); } /** * Gets a CUDA array plane from a CUDA array. * *

Returns in {@code pPlaneArray} a CUDA array that represents a single format plane of the CUDA array {@code hArray}.

* *

If {@code planeIdx} is greater than the maximum number of planes in this array or if the array does not have a multi-planar format e.g: * {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12}, then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* *

Note that if the {@code hArray} has format {@link #CU_AD_FORMAT_NV12 AD_FORMAT_NV12}, then passing in 0 for {@code planeIdx} returns a CUDA array of the same size as {@code * hArray} but with one channel and {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} as its format. If 1 is passed for {@code planeIdx}, then the returned CUDA array has half * the height and width of {@code hArray} with two channels and {@link #CU_AD_FORMAT_UNSIGNED_INT8 AD_FORMAT_UNSIGNED_INT8} as its format.

* * @param pPlaneArray returned CUDA array referenced by the {@code planeIdx} * @param hArray multiplanar CUDA array * @param planeIdx plane index */ @NativeType("CUresult") public static int cuArrayGetPlane(@NativeType("CUarray *") PointerBuffer pPlaneArray, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int planeIdx) { if (CHECKS) { check(pPlaneArray, 1); } return ncuArrayGetPlane(memAddress(pPlaneArray), hArray, planeIdx); } // --- [ cuArrayDestroy ] --- /** * Destroys a CUDA array. * *

Destroys the CUDA array {@code hArray}.

* * @param hArray array to destroy */ @NativeType("CUresult") public static int cuArrayDestroy(@NativeType("CUarray") long hArray) { long __functionAddress = Functions.ArrayDestroy; if (CHECKS) { check(hArray); } return callPI(hArray, __functionAddress); } // --- [ cuArray3DCreate ] --- /** Unsafe version of: {@link #cuArray3DCreate Array3DCreate} */ public static int ncuArray3DCreate(long pHandle, long pAllocateArray) { long __functionAddress = Functions.Array3DCreate; return callPPI(pHandle, pAllocateArray, __functionAddress); } /** * Creates a 3D CUDA array. * *

Creates a CUDA array according to the {@link CUDA_ARRAY3D_DESCRIPTOR} structure {@code pAllocateArray} and returns a handle to the new CUDA array in {@code *pHandle}.

* *
    *
  • {@code Width}, {@code Height}, and {@code Depth} are the width, height, and depth of the CUDA array (in elements); the following types of CUDA * arrays can be allocated: * *
      *
    • A 1D array is allocated if {@code Height} and {@code Depth} extents are both zero.
    • *
    • A 2D array is allocated if only {@code Depth} extent is zero.
    • *
    • A 3D array is allocated if all three extents are non-zero.
    • *
    • A 1D layered CUDA array is allocated if only {@code Height} is zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 1D array. The * number of layers is determined by the depth extent.
    • *
    • A 2D layered CUDA array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 2D array. The * number of layers is determined by the depth extent.
    • *
    • A cubemap CUDA array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_CUBEMAP} flag is set. {@code Width} must be equal to * {@code Height}, and {@code Depth} must be six. A cubemap is a special type of 2D layered CUDA array, where the six layers represent the six * faces of a cube. The order of the six layers in memory is the same as that listed in {@code CUarray_cubemap_face}.
    • *
    • A cubemap layered CUDA array is allocated if all three extents are non-zero, and both, {@link #CUDA_ARRAY3D_CUBEMAP} and {@link #CUDA_ARRAY3D_LAYERED} flags * are set. {@code Width} must be equal to {@code Height}, and {@code Depth} must be a multiple of six. A cubemap layered CUDA array is a special * type of 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first cubemap, the next six layers * form the second cubemap, and so on.
    • *
  • *
  • {@code Format} specifies the format of the elements.
  • *
  • {@code NumChannels} specifies the number of packed components per CUDA array element; it may be 1, 2, or 4;
  • *
  • {@code Flags} may be set to * *
      *
    • {@link #CUDA_ARRAY3D_LAYERED} to enable creation of layered CUDA arrays. If this flag is set, {@code Depth} specifies the number of layers, not the * depth of a 3D array.
    • *
    • {@link #CUDA_ARRAY3D_SURFACE_LDST} to enable surface references to be bound to the CUDA array. If this flag is not set, {@link #cuSurfRefSetArray SurfRefSetArray} will fail * when attempting to bind the CUDA array to a surface reference.
    • *
    • {@link #CUDA_ARRAY3D_CUBEMAP} to enable creation of cubemaps. If this flag is set, {@code Width} must be equal to {@code Height}, and {@code Depth} * must be six. If the {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then {@code Depth} must be a multiple of six.
    • *
    • {@link #CUDA_ARRAY3D_TEXTURE_GATHER} to indicate that the CUDA array will be used for texture gather. Texture gather can only be performed on 2D CUDA * arrays.
    • *
  • *
* *

{@code Width}, {@code Height} and {@code Depth} must meet certain size requirements as listed in the following table. All values are specified in * elements. Note that for brevity's sake, the full name of the device attribute is not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute * {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH}.

* *

Note that 2D CUDA arrays have different size requirements if the {@link #CUDA_ARRAY3D_TEXTURE_GATHER} flag is set. {@code Width} and {@code Height} must not * be greater than {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH} and {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT} respectively, in that * case.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
CUDA array typeValid extents that must always be met
{(width range in elements), (height range), * (depth range)}
Valid extents with CUDA_ARRAY3D_SURFACE_LDST set
* {(width range in elements), (height range), (depth range)}
1D{ (1,TEXTURE1D_WIDTH), 0, 0 }{ (1,SURFACE1D_WIDTH), 0, 0 }
2D{ (1,TEXTURE2D_WIDTH), (1,TEXTURE2D_HEIGHT), 0 }{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }
3D{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } *
OR
{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) }
{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT), * (1,SURFACE3D_DEPTH) }
1D Layered{ (1,TEXTURE1D_LAYERED_WIDTH), 0, * (1,TEXTURE1D_LAYERED_LAYERS) }{ (1,SURFACE1D_LAYERED_WIDTH), 0, * (1,SURFACE1D_LAYERED_LAYERS) }
2D Layered{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) }{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) }
Cubemap{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }{ (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 }
Cubemap Layered{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) }{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) }
* * @param pHandle returned array * @param pAllocateArray 3D array descriptor */ @NativeType("CUresult") public static int cuArray3DCreate(@NativeType("CUarray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY3D_DESCRIPTOR const *") CUDA_ARRAY3D_DESCRIPTOR pAllocateArray) { if (CHECKS) { check(pHandle, 1); } return ncuArray3DCreate(memAddress(pHandle), pAllocateArray.address()); } // --- [ cuArray3DGetDescriptor ] --- /** Unsafe version of: {@link #cuArray3DGetDescriptor Array3DGetDescriptor} */ public static int ncuArray3DGetDescriptor(long pArrayDescriptor, long hArray) { long __functionAddress = Functions.Array3DGetDescriptor; if (CHECKS) { check(hArray); } return callPPI(pArrayDescriptor, hArray, __functionAddress); } /** * Get a 3D CUDA array descriptor. * *

Returns in {@code *pArrayDescriptor} a descriptor containing information on the format and dimensions of the CUDA array {@code hArray}. It is useful * for subroutines that have been passed a CUDA array, but need to know the CUDA array parameters for validation or other purposes.

* *

This function may be called on 1D and 2D arrays, in which case the {@code Height} and/or {@code Depth} members of the descriptor struct will be set to * 0.

* * @param pArrayDescriptor returned 3D array descriptor * @param hArray 3D array to get descriptor of */ @NativeType("CUresult") public static int cuArray3DGetDescriptor(@NativeType("CUDA_ARRAY3D_DESCRIPTOR *") CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor, @NativeType("CUarray") long hArray) { return ncuArray3DGetDescriptor(pArrayDescriptor.address(), hArray); } // --- [ cuMipmappedArrayCreate ] --- /** Unsafe version of: {@link #cuMipmappedArrayCreate MipmappedArrayCreate} */ public static int ncuMipmappedArrayCreate(long pHandle, long pMipmappedArrayDesc, int numMipmapLevels) { long __functionAddress = Functions.MipmappedArrayCreate; if (CHECKS) { check(__functionAddress); } return callPPI(pHandle, pMipmappedArrayDesc, numMipmapLevels, __functionAddress); } /** * Creates a CUDA mipmapped array. * *

Creates a CUDA mipmapped array according to the {@link CUDA_ARRAY3D_DESCRIPTOR} structure {@code pMipmappedArrayDesc} and returns a handle to the new CUDA * mipmapped array in {@code *pHandle}. {@code numMipmapLevels} specifies the number of mipmap levels to be allocated. This value is clamped to the range * {@code [1, 1 + floor(log2(max(width, height, depth)))]}.

* *
    *
  • {@code Width}, {@code Height}, and {@code Depth} are the width, height, and depth of the CUDA array (in elements); the following types of CUDA * arrays can be allocated: * *
      *
    • A 1D mipmapped array is allocated if {@code Height} and {@code Depth} extents are both zero.
    • *
    • A 2D mipmapped array is allocated if only {@code Depth} extent is zero.
    • *
    • A 3D mipmapped array is allocated if all three extents are non-zero.
    • *
    • A 1D layered CUDA mipmapped array is allocated if only {@code Height} is zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 1D * array. The number of layers is determined by the depth extent.
    • *
    • A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_LAYERED} flag is set. Each layer is a 2D * array. The number of layers is determined by the depth extent.
    • *
    • A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the {@link #CUDA_ARRAY3D_CUBEMAP} flag is set. {@code Width} must be * equal to {@code Height}, and {@code Depth} must be six. A cubemap is a special type of 2D layered CUDA array, where the six layers represent * the six faces of a cube. The order of the six layers in memory is the same as that listed in {@code CUarray_cubemap_face}.
    • *
    • A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, {@link #CUDA_ARRAY3D_CUBEMAP} and * {@link #CUDA_ARRAY3D_LAYERED} flags are set. {@code Width} must be equal to {@code Height}, and {@code Depth} must be a multiple of six. A cubemap * layered CUDA array is a special type of 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the * first cubemap, the next six layers form the second cubemap, and so on.
    • *
  • *
  • {@code Format} specifies the format of the elements.
  • *
  • {@code NumChannels} specifies the number of packed components per CUDA array element; it may be 1, 2, or 4;
  • *
  • Flags may be set to: * *
      *
    • {@link #CUDA_ARRAY3D_LAYERED} to enable creation of layered CUDA mipmapped arrays. If this flag is set, {@code Depth} specifies the number of layers, * not the depth of a 3D array.
    • *
    • {@link #CUDA_ARRAY3D_SURFACE_LDST} to enable surface references to be bound to individual mipmap levels of the CUDA mipmapped array. If this flag is * not set, {@link #cuSurfRefSetArray SurfRefSetArray} will fail when attempting to bind a mipmap level of the CUDA mipmapped array to a surface reference.
    • *
    • {@link #CUDA_ARRAY3D_CUBEMAP} to enable creation of mipmapped cubemaps. If this flag is set, {@code Width} must be equal to {@code Height}, and * {@code Depth} must be six. If the {@link #CUDA_ARRAY3D_LAYERED} flag is also set, then {@code Depth} must be a multiple of six.
    • *
    • {@link #CUDA_ARRAY3D_TEXTURE_GATHER} to indicate that the CUDA mipmapped array will be used for texture gather. Texture gather can only be performed on * 2D CUDA mipmapped arrays.
    • *
  • *
* *

{@code Width}, {@code Height} and {@code Depth} must meet certain size requirements as listed in the following table. All values are specified in * elements. Note that for brevity's sake, the full name of the device attribute is not specified. For ex., {@code TEXTURE1D_MIPMAPPED_WIDTH} refers to * the device attribute {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH}.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
CUDA array typeValid extents that must always be met
{(width range in elements), (height range), * (depth range)}
Valid extents with CUDA_ARRAY3D_SURFACE_LDST set
* {(width range in elements), (height range), (depth range)}
1D{ (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }{ (1,SURFACE1D_WIDTH), 0, 0 }
2D{ (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }
3D{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } *
OR
{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), * (1,TEXTURE3D_DEPTH_ALTERNATE) }
{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT), * (1,SURFACE3D_DEPTH) }
1D Layered{ (1,TEXTURE1D_LAYERED_WIDTH), 0, * (1,TEXTURE1D_LAYERED_LAYERS) }{ (1,SURFACE1D_LAYERED_WIDTH), 0, * (1,SURFACE1D_LAYERED_LAYERS) }
2D Layered{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), * (1,TEXTURE2D_LAYERED_LAYERS) }{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), * (1,SURFACE2D_LAYERED_LAYERS) }
Cubemap{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }{ (1,SURFACECUBEMAP_WIDTH), * (1,SURFACECUBEMAP_WIDTH), 6 }
Cubemap Layered{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), * (1,TEXTURECUBEMAP_LAYERED_LAYERS) }{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), * (1,SURFACECUBEMAP_LAYERED_LAYERS) }
* * @param pHandle returned mipmapped array * @param pMipmappedArrayDesc mipmapped array descriptor * @param numMipmapLevels number of mipmap levels */ @NativeType("CUresult") public static int cuMipmappedArrayCreate(@NativeType("CUmipmappedArray *") PointerBuffer pHandle, @NativeType("CUDA_ARRAY3D_DESCRIPTOR const *") CUDA_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc, @NativeType("unsigned int") int numMipmapLevels) { if (CHECKS) { check(pHandle, 1); } return ncuMipmappedArrayCreate(memAddress(pHandle), pMipmappedArrayDesc.address(), numMipmapLevels); } // --- [ cuMipmappedArrayGetLevel ] --- /** Unsafe version of: {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel} */ public static int ncuMipmappedArrayGetLevel(long pLevelArray, long hMipmappedArray, int level) { long __functionAddress = Functions.MipmappedArrayGetLevel; if (CHECKS) { check(__functionAddress); check(hMipmappedArray); } return callPPI(pLevelArray, hMipmappedArray, level, __functionAddress); } /** * Gets a mipmap level of a CUDA mipmapped array. * *

Returns in {@code *pLevelArray} a CUDA array that represents a single mipmap level of the CUDA mipmapped array {@code hMipmappedArray}.

* *

If {@code level} is greater than the maximum number of levels in this mipmapped array, {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* * @param pLevelArray returned mipmap level CUDA array * @param hMipmappedArray CUDA mipmapped array * @param level mipmap level */ @NativeType("CUresult") public static int cuMipmappedArrayGetLevel(@NativeType("CUarray *") PointerBuffer pLevelArray, @NativeType("CUmipmappedArray") long hMipmappedArray, @NativeType("unsigned int") int level) { if (CHECKS) { check(pLevelArray, 1); } return ncuMipmappedArrayGetLevel(memAddress(pLevelArray), hMipmappedArray, level); } // --- [ cuMipmappedArrayDestroy ] --- /** * Destroys a CUDA mipmapped array. * *

Destroys the CUDA mipmapped array {@code hMipmappedArray}.

* * @param hMipmappedArray mipmapped array to destroy */ @NativeType("CUresult") public static int cuMipmappedArrayDestroy(@NativeType("CUmipmappedArray") long hMipmappedArray) { long __functionAddress = Functions.MipmappedArrayDestroy; if (CHECKS) { check(__functionAddress); check(hMipmappedArray); } return callPI(hMipmappedArray, __functionAddress); } // --- [ cuMemAddressReserve ] --- /** Unsafe version of: {@link #cuMemAddressReserve MemAddressReserve} */ public static int ncuMemAddressReserve(long ptr, long size, long alignment, long addr, long flags) { long __functionAddress = Functions.MemAddressReserve; if (CHECKS) { check(__functionAddress); check(addr); } return callPPPPJI(ptr, size, alignment, addr, flags, __functionAddress); } /** * Allocate an address range reservation. * *

Reserves a virtual address range based on the given parameters, giving the starting address of the range in {@code ptr}. This API requires a system * that supports UVA. The size and address parameters must be a multiple of the host page size and the alignment must be a power of two or zero for * default alignment.

* * @param ptr resulting pointer to start of virtual address range allocated * @param size size of the reserved virtual address range requested * @param alignment alignment of the reserved virtual address range requested * @param addr fixed starting address range requested * @param flags currently unused, must be zero */ @NativeType("CUresult") public static int cuMemAddressReserve(@NativeType("CUdeviceptr *") PointerBuffer ptr, @NativeType("size_t") long size, @NativeType("size_t") long alignment, @NativeType("CUdeviceptr") long addr, @NativeType("unsigned long long") long flags) { if (CHECKS) { check(ptr, 1); } return ncuMemAddressReserve(memAddress(ptr), size, alignment, addr, flags); } // --- [ cuMemAddressFree ] --- /** * Free an address range reservation. * *

Frees a virtual address range reserved by {@link #cuMemAddressReserve MemAddressReserve}. The size must match what was given to {@code memAddressReserve} and the ptr given must * match what was returned from {@code memAddressReserve}.

* * @param ptr starting address of the virtual address range to free * @param size size of the virtual address region to free */ @NativeType("CUresult") public static int cuMemAddressFree(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size) { long __functionAddress = Functions.MemAddressFree; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPI(ptr, size, __functionAddress); } // --- [ cuMemCreate ] --- /** Unsafe version of: {@link #cuMemCreate MemCreate} */ public static int ncuMemCreate(long handle, long size, long prop, long flags) { long __functionAddress = Functions.MemCreate; if (CHECKS) { check(__functionAddress); } return callPPPJI(handle, size, prop, flags, __functionAddress); } /** * Create a CUDA memory handle representing a memory allocation of a given size described by the given properties. * *

This creates a memory allocation on the target device specified through the {@code prop} strcuture. The created allocation will not have any device or * host mappings. The generic memory {@code handle} for the allocation can be mapped to the address space of calling process via {@link #cuMemMap MemMap}. This handle * cannot be transmitted directly to other processes (see {@link #cuMemExportToShareableHandle MemExportToShareableHandle}). On Windows, the caller must also pass an * {@code LPSECURITYATTRIBUTE} in {@code prop} to be associated with this handle which limits or allows access to this handle for a recepient process (see * {@link CUmemAllocationProp}{@code ::win32HandleMetaData} for more). The {@code size} of this allocation must be a multiple of the the value given via * {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} with the {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} flag. If {@link CUmemAllocationProp}{@code ::allocFlags::usage} contains * {@link #CU_MEM_CREATE_USAGE_TILE_POOL MEM_CREATE_USAGE_TILE_POOL} flag then the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays and sparse CUDA * mipmapped arrays. (see {@link #cuMemMapArrayAsync MemMapArrayAsync}).

* * @param handle value of handle returned. All operations on this allocation are to be performed using this handle. * @param size size of the allocation requested * @param prop properties of the allocation to create * @param flags flags for future use, must be zero now */ @NativeType("CUresult") public static int cuMemCreate(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("size_t") long size, @NativeType("CUmemAllocationProp const *") CUmemAllocationProp prop, @NativeType("unsigned long long") long flags) { if (CHECKS) { check(handle, 1); } return ncuMemCreate(memAddress(handle), size, prop.address(), flags); } // --- [ cuMemRelease ] --- /** * Release a memory handle representing a memory allocation which was previously allocated through {@link #cuMemCreate MemCreate}. * *

Frees the memory that was allocated on a device through {@code cuMemCreate}.

* *

The memory allocation will be freed when all outstanding mappings to the memory are unmapped and when all outstanding references to the handle * (including it's shareable counterparts) are also released. The generic memory handle can be freed when there are still outstanding mappings made with * this handle. Each time a recepient process imports a shareable handle, it needs to pair it with {@link #cuMemRelease MemRelease} for the handle to be freed. If {@code * handle} is not a valid handle the behavior is undefined.

* * @param handle value of handle which was returned previously by {@code cuMemCreate} */ @NativeType("CUresult") public static int cuMemRelease(@NativeType("CUmemGenericAllocationHandle") long handle) { long __functionAddress = Functions.MemRelease; if (CHECKS) { check(__functionAddress); } return callJI(handle, __functionAddress); } // --- [ cuMemMap ] --- /** * Maps an allocation handle to a reserved virtual address range. * *

Maps bytes of memory represented by {@code handle} starting from byte {@code offset} to {@code size} to address range [ {@code addr}, {@code addr} + * {@code size]}. This range must be an address reservation previously reserved with {@link #cuMemAddressReserve MemAddressReserve}, and {@code offset} + {@code size} must be less * than the size of the memory allocation. Both {@code ptr}, {@code size}, and {@code offset} must be a multiple of the value given via * {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} with the {@link #CU_MEM_ALLOC_GRANULARITY_MINIMUM MEM_ALLOC_GRANULARITY_MINIMUM} flag.

* *

Please note calling {@link #cuMemMap MemMap} does not make the address accessible, the caller needs to update accessibility of a contiguous mapped VA range by * calling {@link #cuMemSetAccess MemSetAccess}.

* *

Once a recipient process obtains a shareable memory handle from {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle}, the process must use {@link #cuMemMap MemMap} to map the memory * into its address ranges before setting accessibility with {@link #cuMemSetAccess MemSetAccess}.

* *

{@link #cuMemMap MemMap} can only create mappings on VA range reservations that are not currently mapped.

* * @param ptr address where memory will be mapped * @param size size of the memory mapping * @param offset offset into the memory represented by - {@code handle} from which to start mapping - Note: currently must be zero * @param handle handle to a shareable memory * @param flags flags for future use, must be zero now */ @NativeType("CUresult") public static int cuMemMap(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size, @NativeType("size_t") long offset, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("unsigned long long") long flags) { long __functionAddress = Functions.MemMap; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPPJJI(ptr, size, offset, handle, flags, __functionAddress); } // --- [ cuMemMapArrayAsync ] --- /** * Unsafe version of: {@link #cuMemMapArrayAsync MemMapArrayAsync} * * @param count count of {@code CUarrayMapInfo} in {@code mapInfoList} */ public static int ncuMemMapArrayAsync(long mapInfoList, int count, long hStream) { long __functionAddress = Functions.MemMapArrayAsync; if (CHECKS) { check(__functionAddress); } return callPPI(mapInfoList, count, hStream, __functionAddress); } /** * Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. * *

Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. Each operation is specified by a * {@link CUarrayMapInfo} entry in the {@code mapInfoList} array of size {@code count}.

* *

where {@code CUarrayMapInfo::resourceType} specifies the type of resource to be operated on. If {@code CUarrayMapInfo::resourceType} is set to * {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} then {@code CUarrayMapInfo::resource::array} must be set to a valid sparse CUDA array handle. The CUDA array must be * either a 2D, 2D layered or 3D CUDA array and must have been allocated using {@link #cuArrayCreate ArrayCreate} or {@link #cuArray3DCreate Array3DCreate} with the flag {@link #CUDA_ARRAY3D_SPARSE}. * For CUDA arrays obtained using {@link #cuMipmappedArrayGetLevel MipmappedArrayGetLevel}, {@link #CUDA_ERROR_INVALID_VALUE} will be returned. If {@code CUarrayMapInfo::resourceType} is set to * {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY} then {@code CUarrayMapInfo::resource::mipmap} must be set to a valid sparse CUDA mipmapped array handle. * The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been allocated using {@link #cuMipmappedArrayCreate MipmappedArrayCreate} with * the flag {@link #CUDA_ARRAY3D_SPARSE}.

* *

{@code CUarrayMapInfo::subresourceType} specifies the type of subresource within the resource.

* *

where {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL} indicates a sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels * which are too small to span at least one tile in any dimension constitute the mip tail region as indicated by {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL} * subresource type.

* *

If {@code CUarrayMapInfo::subresourceType} is set to {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL} then * {@code CUarrayMapInfo::subresource::sparseLevel} struct must contain valid array subregion offsets and extents. The * {@code CUarrayMapInfo::subresource::sparseLevel::offsetX}, {@code CUarrayMapInfo::subresource::sparseLevel::offsetY} and * {@code CUarrayMapInfo::subresource::sparseLevel::offsetZ} must specify valid X, Y and Z offsets respectively. The * {@code CUarrayMapInfo::subresource::sparseLevel::extentWidth}, {@code CUarrayMapInfo::subresource::sparseLevel::extentHeight} and * {@code CUarrayMapInfo::subresource::sparseLevel::extentDepth} must specify valid width, height and depth extents respectively. These offsets and * extents must be aligned to the corresponding tile dimension. For CUDA mipmapped arrays {@code CUarrayMapInfo::subresource::sparseLevel::level} must * specify a valid mip level index. Otherwise, must be zero. For layered CUDA arrays and layered CUDA mipmapped arrays * {@code CUarrayMapInfo::subresource::sparseLevel::layer} must specify a valid layer index. Otherwise, must be zero. * {@code CUarrayMapInfo::subresource::sparseLevel::offsetZ} must be zero and {@code CUarrayMapInfo::subresource::sparseLevel::extentDepth} must be set to * 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays. Tile extents can be obtained by calling {@link #cuArrayGetSparseProperties ArrayGetSparseProperties} and * {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties}

* *

If {@code CUarrayMapInfo::subresourceType} is set to {@link #CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL} then {@code CUarrayMapInfo::subresource::miptail} struct * must contain valid mip tail offset in {@code CUarrayMapInfo::subresource::miptail::offset} and size in * {@code CUarrayMapInfo::subresource::miptail::size}. Both, mip tail offset and mip tail size must be aligned to the tile size. For layered CUDA * mipmapped arrays which don't have the flag {@link #CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL} set in {@link CUDA_ARRAY_SPARSE_PROPERTIES}{@code ::flags} as returned by * {@link #cuMipmappedArrayGetSparseProperties MipmappedArrayGetSparseProperties}, {@code CUarrayMapInfo::subresource::miptail::layer} must specify a valid layer index. Otherwise, must be zero.

* *

{@code CUarrayMapInfo::memOperationType} specifies the type of operation.

* *

If {@code CUarrayMapInfo::memOperationType} is set to {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP} then the subresource will be mapped onto the tile pool memory specified * by {@code CUarrayMapInfo::memHandle} at offset {@code CUarrayMapInfo::offset}. The tile pool allocation has to be created by specifying the * {@link #CU_MEM_CREATE_USAGE_TILE_POOL MEM_CREATE_USAGE_TILE_POOL} flag when calling {@link #cuMemCreate MemCreate}. Also, {@code CUarrayMapInfo::memHandleType} must be set to {@link #CU_MEM_HANDLE_TYPE_GENERIC MEM_HANDLE_TYPE_GENERIC}.

* *

If {@code CUarrayMapInfo::memOperationType} is set to {@link #CU_MEM_OPERATION_TYPE_UNMAP MEM_OPERATION_TYPE_UNMAP} then an unmapping operation is performed. * {@code CUarrayMapInfo::memHandle} must be NULL.

* *

{@code CUarrayMapInfo::deviceBitMask} specifies the list of devices that must map or unmap physical memory. Currently, this mask must have exactly one * bit set, and the corresponding device must match the device associated with the stream. If {@code CUarrayMapInfo::memOperationType} is set to * {@link #CU_MEM_OPERATION_TYPE_MAP MEM_OPERATION_TYPE_MAP}, the device must also match the device associated with the tile pool memory allocation as specified by * {@code CUarrayMapInfo::memHandle}.

* *

{@code CUarrayMapInfo::flags} and {@code CUarrayMapInfo::reserved[]} are unused and must be set to zero.

* * @param mapInfoList list of {@code CUarrayMapInfo} * @param hStream stream identifier for the stream to use for map or unmap operations */ @NativeType("CUresult") public static int cuMemMapArrayAsync(@NativeType("CUarrayMapInfo *") CUarrayMapInfo.Buffer mapInfoList, @NativeType("CUstream") long hStream) { return ncuMemMapArrayAsync(mapInfoList.address(), mapInfoList.remaining(), hStream); } // --- [ cuMemUnmap ] --- /** * Unmap the backing memory of a given address range. * *

The range must be the entire contiguous address range that was mapped to. In other words, {@link #cuMemUnmap MemUnmap} cannot unmap a sub-range of an address range * mapped by {@link #cuMemCreate MemCreate} / {@link #cuMemMap MemMap}. Any backing memory allocations will be freed if there are no existing mappings and there are no unreleased memory * handles.

* *

When {@link #cuMemUnmap MemUnmap} returns successfully the address range is converted to an address reservation and can be used for a future calls to {@link #cuMemMap MemMap}. Any * new mapping to this virtual address will need to have access granted through {@link #cuMemSetAccess MemSetAccess}, as all mappings start with no accessibility setup.

* * @param ptr starting address for the virtual address range to unmap * @param size size of the virtual address range to unmap */ @NativeType("CUresult") public static int cuMemUnmap(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size) { long __functionAddress = Functions.MemUnmap; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPI(ptr, size, __functionAddress); } // --- [ cuMemSetAccess ] --- /** * Unsafe version of: {@link #cuMemSetAccess MemSetAccess} * * @param count number of {@code CUmemAccessDesc} in {@code desc} */ public static int ncuMemSetAccess(long ptr, long size, long desc, long count) { long __functionAddress = Functions.MemSetAccess; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPPPI(ptr, size, desc, count, __functionAddress); } /** * Set the access flags for each location specified in {@code desc} for the given virtual address range. * *

Given the virtual address range via {@code ptr} and {@code size}, and the locations in the array given by {@code desc} and {@code count}, set the * access flags for the target locations. The range must be a fully mapped address range containing all allocations created by {@link #cuMemMap MemMap} / {@link #cuMemCreate MemCreate}.

* * @param ptr starting address for the virtual address range * @param size length of the virtual address range * @param desc array of {@code CUmemAccessDesc} that describe how to change the - mapping for each location specified */ @NativeType("CUresult") public static int cuMemSetAccess(@NativeType("CUdeviceptr") long ptr, @NativeType("size_t") long size, @NativeType("CUmemAccessDesc const *") CUmemAccessDesc.Buffer desc) { return ncuMemSetAccess(ptr, size, desc.address(), desc.remaining()); } // --- [ cuMemGetAccess ] --- /** Unsafe version of: {@link #cuMemGetAccess MemGetAccess} */ public static int ncuMemGetAccess(long flags, long location, long ptr) { long __functionAddress = Functions.MemGetAccess; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPPI(flags, location, ptr, __functionAddress); } /** * Get the access {@code flags} set for the given {@code location} and {@code ptr}. * * @param flags flags set for this location * @param location location in which to check the flags for * @param ptr address in which to check the access flags for */ @NativeType("CUresult") public static int cuMemGetAccess(@NativeType("unsigned long long *") LongBuffer flags, @NativeType("CUmemLocation const *") CUmemLocation location, @NativeType("CUdeviceptr") long ptr) { if (CHECKS) { check(flags, 1); } return ncuMemGetAccess(memAddress(flags), location.address(), ptr); } // --- [ cuMemExportToShareableHandle ] --- /** Unsafe version of: {@link #cuMemExportToShareableHandle MemExportToShareableHandle} */ public static int ncuMemExportToShareableHandle(long shareableHandle, long handle, int handleType, long flags) { long __functionAddress = Functions.MemExportToShareableHandle; if (CHECKS) { check(__functionAddress); } return callPJJI(shareableHandle, handle, handleType, flags, __functionAddress); } /** * Exports an allocation to a requested shareable handle type. * *

Given a CUDA memory handle, create a shareable memory allocation handle that can be used to share the memory with other processes. The recipient * process can convert the shareable handle back into a CUDA memory handle using {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} and map it with {@link #cuMemMap MemMap}. The * implementation of what this handle is and how it can be transferred is defined by the requested handle type in {@code handleType}.

* *

Once all shareable handles are closed and the allocation is released, the allocated memory referenced will be released back to the OS and uses of the * CUDA handle afterward will lead to undefined behavior.

* *

This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) that support importing memory from the shareable type

* * @param shareableHandle pointer to the location in which to store the requested handle type * @param handle CUDA handle for the memory allocation * @param handleType type of shareable handle requested (defines type and size of the {@code shareableHandle} output parameter) * @param flags reserved, must be zero */ @NativeType("CUresult") public static int cuMemExportToShareableHandle(@NativeType("void *") ByteBuffer shareableHandle, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { return ncuMemExportToShareableHandle(memAddress(shareableHandle), handle, handleType, flags); } /** * Exports an allocation to a requested shareable handle type. * *

Given a CUDA memory handle, create a shareable memory allocation handle that can be used to share the memory with other processes. The recipient * process can convert the shareable handle back into a CUDA memory handle using {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} and map it with {@link #cuMemMap MemMap}. The * implementation of what this handle is and how it can be transferred is defined by the requested handle type in {@code handleType}.

* *

Once all shareable handles are closed and the allocation is released, the allocated memory referenced will be released back to the OS and uses of the * CUDA handle afterward will lead to undefined behavior.

* *

This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) that support importing memory from the shareable type

* * @param shareableHandle pointer to the location in which to store the requested handle type * @param handle CUDA handle for the memory allocation * @param handleType type of shareable handle requested (defines type and size of the {@code shareableHandle} output parameter) * @param flags reserved, must be zero */ @NativeType("CUresult") public static int cuMemExportToShareableHandle(@NativeType("void *") PointerBuffer shareableHandle, @NativeType("CUmemGenericAllocationHandle") long handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { return ncuMemExportToShareableHandle(memAddress(shareableHandle), handle, handleType, flags); } // --- [ cuMemImportFromShareableHandle ] --- /** Unsafe version of: {@link #cuMemImportFromShareableHandle MemImportFromShareableHandle} */ public static int ncuMemImportFromShareableHandle(long handle, long osHandle, int shHandleType) { long __functionAddress = Functions.MemImportFromShareableHandle; if (CHECKS) { check(__functionAddress); check(osHandle); } return callPPI(handle, osHandle, shHandleType, __functionAddress); } /** * Imports an allocation from a requested shareable handle type. * *

If the current process cannot support the memory described by this shareable handle, this API will error as {@link #CUDA_ERROR_NOT_SUPPORTED}.

* *
Note
* *

Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc) created on devices under an SLI group may not be supported, and thus * this API will return {@link #CUDA_ERROR_NOT_SUPPORTED}. There is no guarantee that the contents of {@code handle} will be the same CUDA memory handle for the * same given OS shareable handle, or the same underlying allocation.

*
* * @param handle CUDA Memory handle for the memory allocation * @param osHandle shareable Handle representing the memory allocation that is to be imported * @param shHandleType handle type of the exported handle {@code CUmemAllocationHandleType} */ @NativeType("CUresult") public static int cuMemImportFromShareableHandle(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("void *") long osHandle, @NativeType("CUmemAllocationHandleType") int shHandleType) { if (CHECKS) { check(handle, 1); } return ncuMemImportFromShareableHandle(memAddress(handle), osHandle, shHandleType); } // --- [ cuMemGetAllocationGranularity ] --- /** Unsafe version of: {@link #cuMemGetAllocationGranularity MemGetAllocationGranularity} */ public static int ncuMemGetAllocationGranularity(long granularity, long prop, int option) { long __functionAddress = Functions.MemGetAllocationGranularity; if (CHECKS) { check(__functionAddress); } return callPPI(granularity, prop, option, __functionAddress); } /** * Calculates either the minimal or recommended granularity. * *

Calculates either the minimal or recommended granularity for a given allocation specification and returns it in granularity. This granularity can be * used as a multiple for alignment, size, or address mapping.

* * @param granularity returned granularity * @param prop property for which to determine the granularity for * @param option determines which granularity to return */ @NativeType("CUresult") public static int cuMemGetAllocationGranularity(@NativeType("size_t *") PointerBuffer granularity, @NativeType("CUmemAllocationProp const *") CUmemAllocationProp prop, @NativeType("CUmemAllocationGranularity_flags") int option) { if (CHECKS) { check(granularity, 1); } return ncuMemGetAllocationGranularity(memAddress(granularity), prop.address(), option); } // --- [ cuMemGetAllocationPropertiesFromHandle ] --- /** Unsafe version of: {@link #cuMemGetAllocationPropertiesFromHandle MemGetAllocationPropertiesFromHandle} */ public static int ncuMemGetAllocationPropertiesFromHandle(long prop, long handle) { long __functionAddress = Functions.MemGetAllocationPropertiesFromHandle; if (CHECKS) { check(__functionAddress); } return callPJI(prop, handle, __functionAddress); } /** * Retrieve the contents of the property structure defining properties for this handle. * * @param prop pointer to a properties structure which will hold the information about this handle * @param handle handle which to perform the query on */ @NativeType("CUresult") public static int cuMemGetAllocationPropertiesFromHandle(@NativeType("CUmemAllocationProp *") CUmemAllocationProp prop, @NativeType("CUmemGenericAllocationHandle") long handle) { return ncuMemGetAllocationPropertiesFromHandle(prop.address(), handle); } // --- [ cuMemRetainAllocationHandle ] --- /** Unsafe version of: {@link #cuMemRetainAllocationHandle MemRetainAllocationHandle} */ public static int ncuMemRetainAllocationHandle(long handle, long addr) { long __functionAddress = Functions.MemRetainAllocationHandle; if (CHECKS) { check(__functionAddress); } return callPPI(handle, addr, __functionAddress); } /** * Given an address {@code addr}, returns the allocation handle of the backing memory allocation. * *

The handle is guaranteed to be the same handle value used to map the memory. If the address requested is not mapped, the function will fail. The * returned handle must be released with corresponding number of calls to {@link #cuMemRelease MemRelease}.

* *
Note
* *

The address {@code addr}, can be any address in a range previously mapped by {@link #cuMemMap MemMap}, and not necessarily the start address.

* * @param handle CUDA Memory handle for the backing memory allocation * @param addr memory address to query, that has been mapped previously */ @NativeType("CUresult") public static int cuMemRetainAllocationHandle(@NativeType("CUmemGenericAllocationHandle *") LongBuffer handle, @NativeType("void *") ByteBuffer addr) { if (CHECKS) { check(handle, 1); } return ncuMemRetainAllocationHandle(memAddress(handle), memAddress(addr)); } // --- [ cuMemFreeAsync ] --- /** * Frees memory with stream ordered semantics. * *

Inserts a free operation into {@code hStream}. The allocation must not be accessed after stream execution reaches the free. After this API returns, * accessing the memory from any subsequent work launched on the GPU or querying its pointer attributes results in undefined behavior.

* *
Note
* *

During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph * allocation.

* * @param dptr memory to free * @param hStream the stream establishing the stream ordering contract */ @NativeType("CUresult") public static int cuMemFreeAsync(@NativeType("CUdeviceptr") long dptr, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemFreeAsync; if (CHECKS) { check(__functionAddress); check(dptr); } return callPPI(dptr, hStream, __functionAddress); } // --- [ cuMemAllocAsync ] --- /** Unsafe version of: {@link #cuMemAllocAsync MemAllocAsync} */ public static int ncuMemAllocAsync(long dptr, long bytesize, long hStream) { long __functionAddress = Functions.MemAllocAsync; if (CHECKS) { check(__functionAddress); } return callPPPI(dptr, bytesize, hStream, __functionAddress); } /** * Allocates memory with stream ordered semantics * *

Inserts an allocation operation into {@code hStream}. A pointer to the allocated memory is returned immediately in {@code *dptr}. The allocation must * not be accessed until the the allocation operation completes. The allocation comes from the memory pool current to the stream's device.

* *
Note
* *

The default memory pool of a device contains device memory from that device.

* *
Note
* *

Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA * events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.

* *
Note
* *

During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph * instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.

* * @param dptr returned device pointer * @param bytesize number of bytes to allocate * @param hStream the stream establishing the stream ordering contract and the memory pool to allocate from */ @NativeType("CUresult") public static int cuMemAllocAsync(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("CUstream") long hStream) { if (CHECKS) { check(dptr, 1); } return ncuMemAllocAsync(memAddress(dptr), bytesize, hStream); } // --- [ cuMemPoolTrimTo ] --- /** * Tries to release memory back to the OS. * *

Releases memory back to the OS until the pool contains fewer than {@code minBytesToKeep} reserved bytes, or there is no more memory that the allocator * can safely release. The allocator cannot release OS allocations that back outstanding asynchronous allocations. The OS allocations may happen at * different granularity from the user allocations.

* *
Note
* *

Allocations that have not been freed count as outstanding.

* *
Note
* *

Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as * outstanding.

* * @param pool the memory pool to trim * @param minBytesToKeep if the pool has less than {@code minBytesToKeep} reserved, the {@code TrimTo} operation is a no-op. Otherwise the pool will be guaranteed to have * at least {@code minBytesToKeep} bytes reserved after the operation. */ @NativeType("CUresult") public static int cuMemPoolTrimTo(@NativeType("CUmemoryPool") long pool, @NativeType("size_t") long minBytesToKeep) { long __functionAddress = Functions.MemPoolTrimTo; if (CHECKS) { check(__functionAddress); check(pool); } return callPPI(pool, minBytesToKeep, __functionAddress); } // --- [ cuMemPoolSetAttribute ] --- /** Unsafe version of: {@link #cuMemPoolSetAttribute MemPoolSetAttribute} */ public static int ncuMemPoolSetAttribute(long pool, int attr, long value) { long __functionAddress = Functions.MemPoolSetAttribute; if (CHECKS) { check(__functionAddress); check(pool); } return callPPI(pool, attr, value, __functionAddress); } /** * Sets attributes of a memory pool. * * @param pool the memory pool to modify * @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign */ @NativeType("CUresult") public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") ByteBuffer value) { return ncuMemPoolSetAttribute(pool, attr, memAddress(value)); } /** * Sets attributes of a memory pool. * * @param pool the memory pool to modify * @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign */ @NativeType("CUresult") public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") IntBuffer value) { return ncuMemPoolSetAttribute(pool, attr, memAddress(value)); } /** * Sets attributes of a memory pool. * * @param pool the memory pool to modify * @param attr the attribute to modify. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value pointer to the value to assign */ @NativeType("CUresult") public static int cuMemPoolSetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") LongBuffer value) { return ncuMemPoolSetAttribute(pool, attr, memAddress(value)); } // --- [ cuMemPoolGetAttribute ] --- /** Unsafe version of: {@link #cuMemPoolGetAttribute MemPoolGetAttribute} */ public static int ncuMemPoolGetAttribute(long pool, int attr, long value) { long __functionAddress = Functions.MemPoolGetAttribute; if (CHECKS) { check(__functionAddress); check(pool); } return callPPI(pool, attr, value, __functionAddress); } /** * Gets attributes of a memory pool. * * @param pool the memory pool to get attributes of * @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value */ @NativeType("CUresult") public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") ByteBuffer value) { return ncuMemPoolGetAttribute(pool, attr, memAddress(value)); } /** * Gets attributes of a memory pool. * * @param pool the memory pool to get attributes of * @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value */ @NativeType("CUresult") public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") IntBuffer value) { return ncuMemPoolGetAttribute(pool, attr, memAddress(value)); } /** * Gets attributes of a memory pool. * * @param pool the memory pool to get attributes of * @param attr the attribute to get. One of:
{@link #CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC}
{@link #CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES}{@link #CU_MEMPOOL_ATTR_RELEASE_THRESHOLD MEMPOOL_ATTR_RELEASE_THRESHOLD}
{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT MEMPOOL_ATTR_RESERVED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH MEMPOOL_ATTR_RESERVED_MEM_HIGH}
{@link #CU_MEMPOOL_ATTR_USED_MEM_CURRENT MEMPOOL_ATTR_USED_MEM_CURRENT}{@link #CU_MEMPOOL_ATTR_USED_MEM_HIGH MEMPOOL_ATTR_USED_MEM_HIGH}
* @param value retrieved value */ @NativeType("CUresult") public static int cuMemPoolGetAttribute(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemPool_attribute") int attr, @NativeType("void *") LongBuffer value) { return ncuMemPoolGetAttribute(pool, attr, memAddress(value)); } // --- [ cuMemPoolSetAccess ] --- /** * Unsafe version of: {@link #cuMemPoolSetAccess MemPoolSetAccess} * * @param count number of descriptors in the map array */ public static int ncuMemPoolSetAccess(long pool, long map, long count) { long __functionAddress = Functions.MemPoolSetAccess; if (CHECKS) { check(__functionAddress); check(pool); } return callPPPI(pool, map, count, __functionAddress); } /** * Controls visibility of pools between devices. * * @param pool the pool being modified * @param map array of access descriptors. Each descriptor instructs the access to enable for a single gpu. */ @NativeType("CUresult") public static int cuMemPoolSetAccess(@NativeType("CUmemoryPool") long pool, @NativeType("CUmemAccessDesc const *") CUmemAccessDesc.Buffer map) { return ncuMemPoolSetAccess(pool, map.address(), map.remaining()); } // --- [ cuMemPoolGetAccess ] --- /** Unsafe version of: {@link #cuMemPoolGetAccess MemPoolGetAccess} */ public static int ncuMemPoolGetAccess(long flags, long memPool, long location) { long __functionAddress = Functions.MemPoolGetAccess; if (CHECKS) { check(__functionAddress); check(memPool); } return callPPPI(flags, memPool, location, __functionAddress); } /** * Returns the accessibility of a pool from a device. * *

Returns the accessibility of the pool's memory from the specified location.

* * @param flags the accessibility of the pool from the specified location * @param memPool the pool being queried * @param location the location accessing the pool */ @NativeType("CUresult") public static int cuMemPoolGetAccess(@NativeType("CUmemAccess_flags *") IntBuffer flags, @NativeType("CUmemoryPool") long memPool, @NativeType("CUmemLocation *") CUmemLocation location) { if (CHECKS) { check(flags, 1); } return ncuMemPoolGetAccess(memAddress(flags), memPool, location.address()); } // --- [ cuMemPoolCreate ] --- /** Unsafe version of: {@link #cuMemPoolCreate MemPoolCreate} */ public static int ncuMemPoolCreate(long pool, long poolProps) { long __functionAddress = Functions.MemPoolCreate; if (CHECKS) { check(__functionAddress); } return callPPI(pool, poolProps, __functionAddress); } /** * Creates a memory pool. * *

Creates a CUDA memory pool and returns the handle in {@code pool}. The {@code poolProps} determines the properties of the pool such as the backing * device and IPC capabilities.

* *

By default, the pool's memory will be accessible from the device it is allocated on.

* *
Note
* *

Specifying {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE} creates a memory pool that will not support IPC.

*/ @NativeType("CUresult") public static int cuMemPoolCreate(@NativeType("CUmemoryPool *") PointerBuffer pool, @NativeType("CUmemPoolProps const *") CUmemPoolProps poolProps) { if (CHECKS) { check(pool, 1); } return ncuMemPoolCreate(memAddress(pool), poolProps.address()); } // --- [ cuMemPoolDestroy ] --- /** * Destroys the specified memory pool. * *

If any pointers obtained from this pool haven't been freed or the pool has free operations that haven't completed when {@link #cuMemPoolDestroy MemPoolDestroy} is invoked, * the function will return immediately and the resources associated with the pool will be released automatically once there are no more outstanding * allocations.

* *

Destroying the current mempool of a device sets the default mempool of that device as the current mempool for that device.

* *
Note
* *

A device's default memory pool cannot be destroyed.

*/ @NativeType("CUresult") public static int cuMemPoolDestroy(@NativeType("CUmemoryPool") long pool) { long __functionAddress = Functions.MemPoolDestroy; if (CHECKS) { check(__functionAddress); check(pool); } return callPI(pool, __functionAddress); } // --- [ cuMemAllocFromPoolAsync ] --- /** Unsafe version of: {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} */ public static int ncuMemAllocFromPoolAsync(long dptr, long bytesize, long pool, long hStream) { long __functionAddress = Functions.MemAllocFromPoolAsync; if (CHECKS) { check(__functionAddress); check(pool); } return callPPPPI(dptr, bytesize, pool, hStream, __functionAddress); } /** * Allocates memory from a specified pool with stream ordered semantics. * *

Inserts an allocation operation into {@code hStream}. A pointer to the allocated memory is returned immediately in {@code *dptr}. The allocation must * not be accessed until the the allocation operation completes. The allocation comes from the specified memory pool.

* *
Note
* *

The specified memory pool may be from a device different than that of the specified {@code hStream}.

* *
    *
  • Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events * can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
  • *
* *
Note
* *

During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph * instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.

* * @param dptr returned device pointer * @param bytesize number of bytes to allocate * @param pool the pool to allocate from * @param hStream the stream establishing the stream ordering semantic */ @NativeType("CUresult") public static int cuMemAllocFromPoolAsync(@NativeType("CUdeviceptr *") PointerBuffer dptr, @NativeType("size_t") long bytesize, @NativeType("CUmemoryPool") long pool, @NativeType("CUstream") long hStream) { if (CHECKS) { check(dptr, 1); } return ncuMemAllocFromPoolAsync(memAddress(dptr), bytesize, pool, hStream); } // --- [ cuMemPoolExportToShareableHandle ] --- /** Unsafe version of: {@link #cuMemPoolExportToShareableHandle MemPoolExportToShareableHandle} */ public static int ncuMemPoolExportToShareableHandle(long handle_out, long pool, int handleType, long flags) { long __functionAddress = Functions.MemPoolExportToShareableHandle; if (CHECKS) { check(__functionAddress); check(pool); } return callPPJI(handle_out, pool, handleType, flags, __functionAddress); } /** * Exports a memory pool to the requested handle type. * *

Given an IPC capable mempool, create an OS handle to share the pool with another process. A recipient process can convert the shareable handle into a * mempool with {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle}. Individual pointers can then be shared with the {@link #cuMemPoolExportPointer MemPoolExportPointer} and * {@link #cuMemPoolImportPointer MemPoolImportPointer} APIs. The implementation of what the shareable handle is and how it can be transferred is defined by the requested handle * type.

* *
Note
* *

To create an IPC capable mempool, create a mempool with a {@code CUmemAllocationHandleType} other than {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE}.

* * @param handle_out returned OS handle * @param pool pool to export * @param handleType the type of handle to create * @param flags must be 0 */ @NativeType("CUresult") public static int cuMemPoolExportToShareableHandle(@NativeType("void *") ByteBuffer handle_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { return ncuMemPoolExportToShareableHandle(memAddress(handle_out), pool, handleType, flags); } /** * Exports a memory pool to the requested handle type. * *

Given an IPC capable mempool, create an OS handle to share the pool with another process. A recipient process can convert the shareable handle into a * mempool with {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle}. Individual pointers can then be shared with the {@link #cuMemPoolExportPointer MemPoolExportPointer} and * {@link #cuMemPoolImportPointer MemPoolImportPointer} APIs. The implementation of what the shareable handle is and how it can be transferred is defined by the requested handle * type.

* *
Note
* *

To create an IPC capable mempool, create a mempool with a {@code CUmemAllocationHandleType} other than {@link #CU_MEM_HANDLE_TYPE_NONE MEM_HANDLE_TYPE_NONE}.

* * @param handle_out returned OS handle * @param pool pool to export * @param handleType the type of handle to create * @param flags must be 0 */ @NativeType("CUresult") public static int cuMemPoolExportToShareableHandle(@NativeType("void *") PointerBuffer handle_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { return ncuMemPoolExportToShareableHandle(memAddress(handle_out), pool, handleType, flags); } // --- [ cuMemPoolImportFromShareableHandle ] --- /** Unsafe version of: {@link #cuMemPoolImportFromShareableHandle MemPoolImportFromShareableHandle} */ public static int ncuMemPoolImportFromShareableHandle(long pool_out, long handle, int handleType, long flags) { long __functionAddress = Functions.MemPoolImportFromShareableHandle; if (CHECKS) { check(__functionAddress); } return callPPJI(pool_out, handle, handleType, flags, __functionAddress); } /** * Imports a memory pool from a shared handle. * *

Specific allocations can be imported from the imported pool with {@link #cuMemPoolImportPointer MemPoolImportPointer}.

* *
Note
* *

Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in {@link #cuDeviceSetMemPool DeviceSetMemPool} or * {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} calls.

* * @param pool_out returned memory pool * @param handle OS handle of the pool to open * @param handleType the type of handle being imported * @param flags must be 0 */ @NativeType("CUresult") public static int cuMemPoolImportFromShareableHandle(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("void *") ByteBuffer handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { if (CHECKS) { check(pool_out, 1); } return ncuMemPoolImportFromShareableHandle(memAddress(pool_out), memAddress(handle), handleType, flags); } /** * Imports a memory pool from a shared handle. * *

Specific allocations can be imported from the imported pool with {@link #cuMemPoolImportPointer MemPoolImportPointer}.

* *
Note
* *

Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in {@link #cuDeviceSetMemPool DeviceSetMemPool} or * {@link #cuMemAllocFromPoolAsync MemAllocFromPoolAsync} calls.

* * @param pool_out returned memory pool * @param handle OS handle of the pool to open * @param handleType the type of handle being imported * @param flags must be 0 */ @NativeType("CUresult") public static int cuMemPoolImportFromShareableHandle(@NativeType("CUmemoryPool *") PointerBuffer pool_out, @NativeType("void *") PointerBuffer handle, @NativeType("CUmemAllocationHandleType") int handleType, @NativeType("unsigned long long") long flags) { if (CHECKS) { check(pool_out, 1); } return ncuMemPoolImportFromShareableHandle(memAddress(pool_out), memAddress(handle), handleType, flags); } // --- [ cuMemPoolExportPointer ] --- /** Unsafe version of: {@link #cuMemPoolExportPointer MemPoolExportPointer} */ public static int ncuMemPoolExportPointer(long shareData_out, long ptr) { long __functionAddress = Functions.MemPoolExportPointer; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPI(shareData_out, ptr, __functionAddress); } /** * Export data to share a memory pool allocation between processes. * *

Constructs {@code shareData_out} for sharing a specific allocation from an already shared memory pool. The recipient process can import the allocation * with the {@link #cuMemPoolImportPointer MemPoolImportPointer} api. The data is not a handle and may be shared through any IPC mechanism.

* * @param shareData_out returned export data * @param ptr pointer to memory being exported */ @NativeType("CUresult") public static int cuMemPoolExportPointer(@NativeType("CUmemPoolPtrExportData *") CUmemPoolPtrExportData shareData_out, @NativeType("CUdeviceptr") long ptr) { return ncuMemPoolExportPointer(shareData_out.address(), ptr); } // --- [ cuMemPoolImportPointer ] --- /** Unsafe version of: {@link #cuMemPoolImportPointer MemPoolImportPointer} */ public static int ncuMemPoolImportPointer(long ptr_out, long pool, long shareData) { long __functionAddress = Functions.MemPoolImportPointer; if (CHECKS) { check(__functionAddress); check(pool); } return callPPPI(ptr_out, pool, shareData, __functionAddress); } /** * Import a memory pool allocation from another process. * *

Returns in {@code ptr_out} a pointer to the imported memory. The imported memory must not be accessed before the allocation operation completes in the * exporting process. The imported memory must be freed from all importing processes before being freed in the exporting process. The pointer may be freed * with {@link #cuMemFree MemFree} or {@link #cuMemFreeAsync MemFreeAsync}. If {@code cuMemFreeAsync} is used, the free must be completed on the importing process before the free operation on * the exporting process.

* *
Note
* *

The {@code cuMemFreeAsync} api may be used in the exporting process before the cuMemFreeAsync operation completes in its stream as long as * the {@code cuMemFreeAsync} in the exporting process specifies a stream with a stream dependency on the importing process's {@code cuMemFreeAsync}.

* * @param ptr_out pointer to imported memory * @param pool pool from which to import * @param shareData data specifying the memory to import */ @NativeType("CUresult") public static int cuMemPoolImportPointer(@NativeType("CUdeviceptr *") PointerBuffer ptr_out, @NativeType("CUmemoryPool") long pool, @NativeType("CUmemPoolPtrExportData *") CUmemPoolPtrExportData shareData) { if (CHECKS) { check(ptr_out, 1); } return ncuMemPoolImportPointer(memAddress(ptr_out), pool, shareData.address()); } // --- [ cuPointerGetAttribute ] --- /** Unsafe version of: {@link #cuPointerGetAttribute PointerGetAttribute} */ public static int ncuPointerGetAttribute(long data, int attribute, long ptr) { long __functionAddress = Functions.PointerGetAttribute; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPI(data, attribute, ptr, __functionAddress); } /** * Returns information about a pointer. * *

The supported attributes are:

* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of * {@code data} must be {@code CUcontext *}. * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}: * *

    Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of * {@code data} must be unsigned int.

    * *

    If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides * is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.

    * *

    If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.

    * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running * in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}. * *

    If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

    * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program. * The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access * {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct * of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}. * *

    {@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the * lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has * a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: * *

    A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will * always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous * memory operations can exhibit asynchronous behavior.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to * an unsigned long long. * *

    {@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation * APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique * within a single process.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not. * *

    If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was * allocated or registered.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is * suitable for {@code cudaIpcGetMemHandle()}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}. * Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from * {@link #cuMemAddressReserve MemAddressReserve}).
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is * not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}). * To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a * backing allocation.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to * {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
  • *
* *

Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The * exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the * {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the * allocation. In particular

* *
    *
  • The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
  • *
  • The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
  • *
* *

For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and * device addresses from either address.

* * @param data returned pointer attribute value * @param attribute pointer attribute to query * @param ptr pointer */ @NativeType("CUresult") public static int cuPointerGetAttribute(@NativeType("void *") ByteBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) { return ncuPointerGetAttribute(memAddress(data), attribute, ptr); } /** * Returns information about a pointer. * *

The supported attributes are:

* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of * {@code data} must be {@code CUcontext *}. * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}: * *

    Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of * {@code data} must be unsigned int.

    * *

    If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides * is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.

    * *

    If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.

    * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running * in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}. * *

    If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

    * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program. * The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access * {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct * of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}. * *

    {@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the * lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has * a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: * *

    A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will * always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous * memory operations can exhibit asynchronous behavior.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to * an unsigned long long. * *

    {@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation * APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique * within a single process.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not. * *

    If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was * allocated or registered.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is * suitable for {@code cudaIpcGetMemHandle()}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}. * Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from * {@link #cuMemAddressReserve MemAddressReserve}).
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is * not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}). * To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a * backing allocation.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to * {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
  • *
* *

Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The * exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the * {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the * allocation. In particular

* *
    *
  • The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
  • *
  • The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
  • *
* *

For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and * device addresses from either address.

* * @param data returned pointer attribute value * @param attribute pointer attribute to query * @param ptr pointer */ @NativeType("CUresult") public static int cuPointerGetAttribute(@NativeType("void *") PointerBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) { return ncuPointerGetAttribute(memAddress(data), attribute, ptr); } /** * Returns information about a pointer. * *

The supported attributes are:

* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of * {@code data} must be {@code CUcontext *}. * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}: * *

    Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of * {@code data} must be unsigned int.

    * *

    If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides * is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.

    * *

    If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.

    * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running * in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}. * *

    If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

    * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program. * The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access * {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct * of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}. * *

    {@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the * lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has * a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: * *

    A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will * always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous * memory operations can exhibit asynchronous behavior.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to * an unsigned long long. * *

    {@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation * APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique * within a single process.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not. * *

    If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was * allocated or registered.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is * suitable for {@code cudaIpcGetMemHandle()}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}. * Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from * {@link #cuMemAddressReserve MemAddressReserve}).
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is * not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}). * To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a * backing allocation.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to * {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
  • *
* *

Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The * exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the * {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the * allocation. In particular

* *
    *
  • The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
  • *
  • The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
  • *
* *

For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and * device addresses from either address.

* * @param data returned pointer attribute value * @param attribute pointer attribute to query * @param ptr pointer */ @NativeType("CUresult") public static int cuPointerGetAttribute(@NativeType("void *") IntBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) { return ncuPointerGetAttribute(memAddress(data), attribute, ptr); } /** * Returns information about a pointer. * *

The supported attributes are:

* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}: Returns in {@code *data} the {@code CUcontext} in which {@code ptr} was allocated or registered. The type of * {@code data} must be {@code CUcontext *}. * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}: * *

    Returns in {@code *data} the physical memory type of the memory that {@code ptr} addresses as a {@code CUmemorytype} enumerated value. The type of * {@code data} must be unsigned int.

    * *

    If {@code ptr} addresses device memory then {@code *data} is set to {@link #CU_MEMORYTYPE_DEVICE MEMORYTYPE_DEVICE}. The particular {@code CUdevice} on which the memory resides * is the {@code CUdevice} of the {@code CUcontext} returned by the {@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT} attribute of {@code ptr}.

    * *

    If {@code ptr} addresses host memory then {@code *data} is set to {@link #CU_MEMORYTYPE_HOST MEMORYTYPE_HOST}.

    * *

    If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses unified virtual addressing then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If the current {@code CUcontext} does not support unified virtual addressing then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}: Returns in {@code *data} the device pointer value through which {@code ptr} may be accessed by kernels running * in the current {@code CUcontext}. The type of {@code data} must be {@code CUdeviceptr *}. * *

    If there exists no device pointer value through which kernels running in the current {@code CUcontext} may access {@code ptr} then * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

    * *

    If there is no current {@code CUcontext} then {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

    * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}: Returns in {@code *data} the host pointer value through which {@code ptr} may be accessed by by the host program. * The type of {@code data} must be {@code void **}. If there exists no host pointer value through which the host program may directly access * {@code ptr} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. * *

    Except in the exceptional disjoint addressing cases discussed below, the value returned in {@code *data} will equal the input value {@code ptr}.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}: Returns in {@code *data} two tokens for use with the nv-p2p.h Linux kernel interface. {@code data} must be a struct * of type {@link CUDA_POINTER_ATTRIBUTE_P2P_TOKENS}. * *

    {@code ptr} must be a pointer to memory obtained from {@link #cuMemAlloc MemAlloc}. Note that {@code p2pToken} and {@code vaSpaceToken} are only valid for the * lifetime of the source allocation. A subsequent allocation at the same address may return completely different tokens. Querying this attribute has * a side effect of setting the attribute {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS} for the region of memory that {@code ptr} points to.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: * *

    A boolean attribute which when set, ensures that synchronous memory operations initiated on the region of memory that {@code ptr} points to will * always synchronize. See further documentation in the section titled "API synchronization behavior" to learn more about cases when synchronous * memory operations can exhibit asynchronous behavior.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}: Returns in {@code *data} a buffer ID which is guaranteed to be unique within the process. {@code data} must point to * an unsigned long long. * *

    {@code ptr} must be a pointer to memory obtained from a CUDA memory allocation API. Every memory allocation from any of the CUDA memory allocation * APIs will have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs from previous freed allocations. IDs are only unique * within a single process.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}: Returns in {@code *data} a boolean that indicates whether the pointer points to managed memory or not. * *

    If {@code ptr} is not a valid CUDA pointer then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

  • *
  • {@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}: Returns in {@code *data} an integer representing a device ordinal of a device against which the memory was * allocated or registered.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}: Returns in {@code *data} a boolean that indicates if this pointer maps to an allocation that is * suitable for {@code cudaIpcGetMemHandle()}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}: Returns in {@code *data} the starting address for the allocation referenced by the device pointer {@code ptr}. * Note that this is not necessarily the address of the mapped region, but the address of the mappable address range {@code ptr} references (e.g. from * {@link #cuMemAddressReserve MemAddressReserve}).
  • *
  • {@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}: Returns in {@code *data} the size for the allocation referenced by the device pointer {@code ptr}. Note that this is * not necessarily the size of the mapped region, but the size of the mappable address range {@code ptr} references (e.g. from {@link #cuMemAddressReserve MemAddressReserve}). * To retrieve the size of the mapped region, see {@link #cuMemGetAddressRange MemGetAddressRange}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}: Returns in {@code *data} a boolean that indicates if this pointer is in a valid address range that is mapped to a * backing allocation.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}: Returns a bitmask of the allowed handle types for an allocation that may be passed to * {@link #cuMemExportToShareableHandle MemExportToShareableHandle}.
  • *
  • {@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}: Returns in {@code *data} the handle to the mempool that the allocation was obtained from.
  • *
* *

Note that for most allocations in the unified virtual address space the host and device pointer for accessing the allocation will be the same. The * exceptions to this are - user memory registered using {@link #cuMemHostRegister MemHostRegister} - host memory allocated using {@link #cuMemHostAlloc MemHostAlloc} with the * {@link #CU_MEMHOSTALLOC_WRITECOMBINED MEMHOSTALLOC_WRITECOMBINED} flag For these types of allocation there will exist separate, disjoint host and device addresses for accessing the * allocation. In particular

* *
    *
  • The host address will correspond to an invalid unmapped device address (which will result in an exception if accessed from the device)
  • *
  • The device address will correspond to an invalid unmapped host address (which will result in an exception if accessed from the host).
  • *
* *

For these types of allocations, querying {@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER} and {@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER} may be used to retrieve the host and * device addresses from either address.

* * @param data returned pointer attribute value * @param attribute pointer attribute to query * @param ptr pointer */ @NativeType("CUresult") public static int cuPointerGetAttribute(@NativeType("void *") LongBuffer data, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) { return ncuPointerGetAttribute(memAddress(data), attribute, ptr); } // --- [ cuMemPrefetchAsync ] --- /** * Prefetches memory to the specified destination device, * *

Prefetches memory to the specified destination device. {@code devPtr} is the base device pointer of the memory to be prefetched and {@code dstDevice} * is the destination device. {@code count} specifies the number of bytes to copy. {@code hStream} is the stream in which the operation is enqueued. The * memory range must refer to managed memory allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables.

* *

Passing in {@link #CU_DEVICE_CPU DEVICE_CPU} for {@code dstDevice} will prefetch the data to host memory. If {@code dstDevice} is a GPU, then the device attribute * {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} must be non-zero. Additionally, {@code hStream} must be associated with a device that has a non-zero * value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}.

* *

The start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the * prefetch operation is enqueued in the stream.

* *

If no physical memory has been allocated for this region, then this memory region will be populated and mapped on the destination device. If there's * insufficient memory to prefetch the desired region, the Unified Memory driver may evict pages from other {@link #cuMemAllocManaged MemAllocManaged} allocations to host memory * in order to make room. Device memory allocated using {@link #cuMemAlloc MemAlloc} or {@link #cuArrayCreate ArrayCreate} will not be evicted.

* *

By default, any mappings to the previous location of the migrated pages are removed and mappings for the new location are only setup on {@code * dstDevice}. The exact behavior however also depends on the settings applied to this memory range via {@link #cuMemAdvise MemAdvise} as described below:

* *

If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} was set on any subset of this memory range, then that subset will create a read-only copy of the pages on {@code * dstDevice}.

* *

If {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} was called on any subset of this memory range, then the pages will be migrated to {@code dstDevice} even if * {@code dstDevice} is not the preferred location of any pages in the memory range.

* *

If {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} was called on any subset of this memory range, then mappings to those pages from all the appropriate processors are * updated to refer to the new location if establishing such a mapping is possible. Otherwise, those mappings are cleared.

* *

Note that this API is not required for functionality and only serves to improve performance by allowing the application to migrate data to a suitable * location before it is accessed. Memory accesses to this range are always coherent and are allowed even when the data is actively being migrated.

* *

Note that this function is asynchronous with respect to the host and all work on other devices.

* * @param devPtr pointer to be prefetched * @param count size in bytes * @param dstDevice destination device to prefetch to * @param hStream stream to enqueue prefetch operation */ @NativeType("CUresult") public static int cuMemPrefetchAsync(@NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count, @NativeType("CUdevice") int dstDevice, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.MemPrefetchAsync; if (CHECKS) { check(__functionAddress); check(devPtr); } return callPPPI(devPtr, count, dstDevice, hStream, __functionAddress); } // --- [ cuMemAdvise ] --- /** * Advise about the usage of a given memory range. * *

Advise the Unified Memory subsystem about the usage pattern for the memory range starting at {@code devPtr} with a size of {@code count} bytes. The * start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the advice is * applied. The memory range must refer to managed memory allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables. The memory range * could also refer to system-allocated pageable memory provided it represents a valid, host-accessible region of memory and all additional constraints * imposed by {@code advice} as outlined below are also satisfied. Specifying an invalid system-allocated pageable memory range results in an error being * returned.

* *

The {@code advice} parameter can take the following values:

* *
    *
  • {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}: This implies that the data is mostly going to be read from and only occasionally written to. Any read accesses * from any processor to this region will create a read-only copy of at least the accessed pages in that processor's memory. Additionally, if * {@link #cuMemPrefetchAsync MemPrefetchAsync} is called on this region, it will create a read-only copy of the data on the destination processor. If any processor writes to * this region, all copies of the corresponding page will be invalidated except for the one where the write occurred. The {@code device} argument is * ignored for this advice. Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU that has a non-zero * value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. Also, if a context is created on a device that does not have the * device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} set, then read-duplication will not occur until all such contexts are destroyed. * If the memory region refers to valid system-allocated pageable memory, then the accessing device must have a non-zero value for the device * attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS} for a read-only copy to be created on that device. Note however that if the accessing device * also has a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then setting this advice * will not create a read-only copy when that device accesses this memory region.
  • *
  • {@link #CU_MEM_ADVISE_UNSET_READ_MOSTLY MEM_ADVISE_UNSET_READ_MOSTLY}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} and also prevents the Unified Memory driver from attempting * heuristic read-duplication on the memory range. Any read-duplicated copies of the data will be collapsed into a single copy. The location for the * collapsed copy will be the preferred location if the page has a preferred location and one of the read-duplicated copies was resident at that * location. Otherwise, the location chosen is arbitrary.
  • *
  • {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION}: This advice sets the preferred location for the data to be the memory belonging to {@code device}. Passing * in CU_DEVICE_CPU for {@code device} sets the preferred location as host memory. If {@code device} is a GPU, then it must have a non-zero value for * the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. Setting the preferred location does not cause data to migrate to that * location immediately. Instead, it guides the migration policy when a fault occurs on that memory region. If the data is already in its preferred * location and the faulting processor can establish a mapping without requiring the data to be migrated, then data migration will be avoided. On the * other hand, if the data is not in its preferred location or if a direct mapping cannot be established, then it will be migrated to the processor * accessing it. It is important to note that setting the preferred location does not prevent data prefetching done using {@link #cuMemPrefetchAsync MemPrefetchAsync}. Having * a preferred location can override the page thrash detection and resolution logic in the Unified Memory driver. Normally, if a page is detected to * be constantly thrashing between for example host and device memory, the page may eventually be pinned to host memory by the Unified Memory driver. * But if the preferred location is set as device memory, then the page will continue to thrash indefinitely. If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} is * also set on this memory region or any subset of it, then the policies associated with that advice will override the policies of this advice, unless * read accesses from {@code device} will not result in a read-only copy being created on that device as outlined in description for the advice * {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY}. If the memory region refers to valid system-allocated pageable memory, then {@code device} must have a non-zero * value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code device} has a non-zero value for the device * attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no effect. Note however that this behavior may * change in the future.
  • *
  • {@link #CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION MEM_ADVISE_UNSET_PREFERRED_LOCATION}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} and changes the preferred location to none.
  • *
  • {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY}: This advice implies that the data will be accessed by {@code device}. Passing in {@link #CU_DEVICE_CPU DEVICE_CPU} for {@code * device} will set the advice for the CPU. If {@code device} is a GPU, then the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS} must * be non-zero. This advice does not cause data migration and has no impact on the location of the data per se. Instead, it causes the data to always * be mapped in the specified processor's page tables, as long as the location of the data permits a mapping to be established. If the data gets * migrated for any reason, the mappings are updated accordingly. This advice is recommended in scenarios where data locality is not important, but * avoiding faults is. Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the data located on one GPU is * occasionally accessed by peer GPUs. In such scenarios, migrating data over to the other GPUs is not as important because the accesses are * infrequent and the overhead of migration may be too high. But preventing faults can still help improve performance, and so having a mapping set up * in advance is useful. Note that on CPU access of this data, the data may be migrated to host memory because the CPU typically cannot access device * memory directly. Any GPU that had the {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} flag set for this data will now have its mapping updated to point to the page * in host memory. If {@link #CU_MEM_ADVISE_SET_READ_MOSTLY MEM_ADVISE_SET_READ_MOSTLY} is also set on this memory region or any subset of it, then the policies associated with that * advice will override the policies of this advice. Additionally, if the preferred location of this memory region or any subset of it is also {@code * device}, then the policies associated with {@link #CU_MEM_ADVISE_SET_PREFERRED_LOCATION MEM_ADVISE_SET_PREFERRED_LOCATION} will override the policies of this advice. If the memory region * refers to valid system-allocated pageable memory, then {@code device} must have a non-zero value for the device attribute * {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code device} has a non-zero value for the device attribute * {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no effect.
  • *
  • {@link #CU_MEM_ADVISE_UNSET_ACCESSED_BY MEM_ADVISE_UNSET_ACCESSED_BY}: Undoes the effect of {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY}. Any mappings to the data from {@code device} may be * removed at any time causing accesses to result in non-fatal page faults. If the memory region refers to valid system-allocated pageable memory, * then {@code device} must have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}. Additionally, if {@code * device} has a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES}, then this call has no * effect.
  • *
* * @param devPtr pointer to memory to set the advice for * @param count size in bytes of the memory range * @param advice advice to be applied for the specified memory range * @param device device to apply the advice for */ @NativeType("CUresult") public static int cuMemAdvise(@NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count, @NativeType("CUmem_advise") int advice, @NativeType("CUdevice") int device) { long __functionAddress = Functions.MemAdvise; if (CHECKS) { check(__functionAddress); check(devPtr); } return callPPI(devPtr, count, advice, device, __functionAddress); } // --- [ cuMemRangeGetAttribute ] --- /** * Unsafe version of: {@link #cuMemRangeGetAttribute MemRangeGetAttribute} * * @param dataSize the size of {@code data} */ public static int ncuMemRangeGetAttribute(long data, long dataSize, int attribute, long devPtr, long count) { long __functionAddress = Functions.MemRangeGetAttribute; if (CHECKS) { check(__functionAddress); check(devPtr); } return callPPPPI(data, dataSize, attribute, devPtr, count, __functionAddress); } /** * Query an attribute of a given memory range. * *

Query an attribute about the memory range starting at {@code devPtr} with a size of {@code count} bytes. The memory range must refer to managed memory * allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables.

* *

The {@code attribute} parameter can take the following values:

* *
    *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code dataSize} * must be 4. The result returned will be 1 if all pages in the given memory range have read-duplication enabled, or 0 otherwise.
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code * dataSize} must be 4. The result returned will be a GPU device id if all pages in the memory range have that GPU as their preferred location, or it * will be CU_DEVICE_CPU if all pages in the memory range have the CPU as their preferred location, or it will be CU_DEVICE_INVALID if either all the * pages don't have the same preferred location or some of the pages don't have a preferred location at all. Note that the actual location of the * pages in the memory range at the time of the query may be different from the preferred location.
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY}: If this attribute is specified, {@code data} will be interpreted as an array of 32-bit integers, and {@code * dataSize} must be a non-zero multiple of 4. The result returned will be a list of device ids that had {@link #CU_MEM_ADVISE_SET_ACCESSED_BY MEM_ADVISE_SET_ACCESSED_BY} set for that * entire memory range. If any device does not have that advice set for the entire memory range, that device will not be included. If {@code data} is * larger than the number of devices that have that advice set for that memory range, CU_DEVICE_INVALID will be returned in all the extra space * provided. For ex., if {@code dataSize} is 12 (i.e. {@code data} has 3 elements) and only device 0 has the advice set, then the result returned will * be { 0, CU_DEVICE_INVALID, CU_DEVICE_INVALID }. If {@code data} is smaller than the number of devices that have that advice set, then only as many * devices will be returned as can fit in the array. There is no guarantee on which specific devices will be returned, however.
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}: If this attribute is specified, {@code data} will be interpreted as a 32-bit integer, and {@code * dataSize} must be 4. The result returned will be the last location to which all pages in the memory range were prefetched explicitly via * {@link #cuMemPrefetchAsync MemPrefetchAsync}. This will either be a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU * respectively. If any page in the memory range was never explicitly prefetched or if all pages were not prefetched to the same location, * CU_DEVICE_INVALID will be returned. Note that this simply returns the last location that the applicaton requested to prefetch the memory range to. * It gives no indication as to whether the prefetch operation to that location has completed or even begun.
  • *
* * @param data a pointers to a memory location where the result of each attribute query will be written to * @param attribute the attribute to query * @param devPtr start of the range to query * @param count size of the range to query */ @NativeType("CUresult") public static int cuMemRangeGetAttribute(@NativeType("void *") ByteBuffer data, @NativeType("CUmem_range_attribute") int attribute, @NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count) { return ncuMemRangeGetAttribute(memAddress(data), data.remaining(), attribute, devPtr, count); } // --- [ cuMemRangeGetAttributes ] --- /** * Unsafe version of: {@link #cuMemRangeGetAttributes MemRangeGetAttributes} * * @param numAttributes number of attributes to query */ public static int ncuMemRangeGetAttributes(long data, long dataSizes, long attributes, long numAttributes, long devPtr, long count) { long __functionAddress = Functions.MemRangeGetAttributes; if (CHECKS) { check(__functionAddress); check(devPtr); } return callPPPPPPI(data, dataSizes, attributes, numAttributes, devPtr, count, __functionAddress); } /** * Query attributes of a given memory range. * *

Query attributes of the memory range starting at {@code devPtr} with a size of {@code count} bytes. The memory range must refer to managed memory * allocated via {@link #cuMemAllocManaged MemAllocManaged} or declared via __managed__ variables. The {@code attributes} array will be interpreted to have {@code numAttributes} * entries. The {@code dataSizes} array will also be interpreted to have {@code numAttributes} entries. The results of the query will be stored in {@code data}.

* *

The list of supported attributes are given below. Please refer to {@link #cuMemRangeGetAttribute MemRangeGetAttribute} for attribute descriptions and restrictions.

* *
    *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY}
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION}
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY}
  • *
  • {@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}
  • *
* * @param data a two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to * @param dataSizes array containing the sizes of each result * @param attributes an array of attributes to query (numAttributes and the number of attributes in this array should match). One of:
{@link #CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY MEM_RANGE_ATTRIBUTE_READ_MOSTLY}{@link #CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION}
{@link #CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY MEM_RANGE_ATTRIBUTE_ACCESSED_BY}{@link #CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION}
* @param devPtr start of the range to query * @param count size of the range to query */ @NativeType("CUresult") public static int cuMemRangeGetAttributes(@NativeType("void **") PointerBuffer data, @NativeType("size_t *") PointerBuffer dataSizes, @NativeType("CUmem_range_attribute *") IntBuffer attributes, @NativeType("CUdeviceptr") long devPtr, @NativeType("size_t") long count) { if (CHECKS) { check(data, attributes.remaining()); check(dataSizes, attributes.remaining()); } return ncuMemRangeGetAttributes(memAddress(data), memAddress(dataSizes), memAddress(attributes), attributes.remaining(), devPtr, count); } // --- [ cuPointerSetAttribute ] --- /** Unsafe version of: {@link #cuPointerSetAttribute PointerSetAttribute} */ public static int ncuPointerSetAttribute(long value, int attribute, long ptr) { long __functionAddress = Functions.PointerSetAttribute; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPI(value, attribute, ptr, __functionAddress); } /** * Set attributes on a previously allocated memory region. * *

The supported attributes are:

* *
    *
  • {@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}: A boolean attribute that can either be set (1) or unset (0). * *

    When set, the region of memory that {@code ptr} points to is guaranteed to always synchronize memory operations that are synchronous. If there are * some previously initiated synchronous memory operations that are pending when this attribute is set, the function does not return until those * memory operations are complete. See further documentation in the section titled "API synchronization behavior" to learn more about cases when * synchronous memory operations can exhibit asynchronous behavior. {@code value} will be considered as a pointer to an unsigned integer to which this * attribute is to be set.

  • *
* * @param value pointer to memory containing the value to be set * @param attribute pointer attribute to set * @param ptr pointer to a memory region allocated using CUDA memory allocation APIs */ @NativeType("CUresult") public static int cuPointerSetAttribute(@NativeType("void const *") ByteBuffer value, @NativeType("CUpointer_attribute") int attribute, @NativeType("CUdeviceptr") long ptr) { return ncuPointerSetAttribute(memAddress(value), attribute, ptr); } // --- [ cuPointerGetAttributes ] --- /** * Unsafe version of: {@link #cuPointerGetAttributes PointerGetAttributes} * * @param numAttributes number of attributes to query */ public static int ncuPointerGetAttributes(int numAttributes, long attributes, long data, long ptr) { long __functionAddress = Functions.PointerGetAttributes; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPPI(numAttributes, attributes, data, ptr, __functionAddress); } /** * Returns information about a pointer. * *

Unlike {@link #cuPointerGetAttribute PointerGetAttribute}, this function will not return an error when the {@code ptr} encountered is not a valid CUDA pointer. Instead, the * attributes are assigned default {@code NULL} values and {@link #CUDA_SUCCESS} is returned.

* *

If {@code ptr} was not allocated by, mapped by, or registered with a {@code CUcontext} which uses UVA (Unified Virtual Addressing), * {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.

* * @param attributes an array of attributes to query (numAttributes and the number of attributes in this array should match). One of:
{@link #CU_POINTER_ATTRIBUTE_CONTEXT POINTER_ATTRIBUTE_CONTEXT}{@link #CU_POINTER_ATTRIBUTE_MEMORY_TYPE POINTER_ATTRIBUTE_MEMORY_TYPE}
{@link #CU_POINTER_ATTRIBUTE_DEVICE_POINTER POINTER_ATTRIBUTE_DEVICE_POINTER}{@link #CU_POINTER_ATTRIBUTE_HOST_POINTER POINTER_ATTRIBUTE_HOST_POINTER}
{@link #CU_POINTER_ATTRIBUTE_P2P_TOKENS POINTER_ATTRIBUTE_P2P_TOKENS}{@link #CU_POINTER_ATTRIBUTE_SYNC_MEMOPS POINTER_ATTRIBUTE_SYNC_MEMOPS}
{@link #CU_POINTER_ATTRIBUTE_BUFFER_ID POINTER_ATTRIBUTE_BUFFER_ID}{@link #CU_POINTER_ATTRIBUTE_IS_MANAGED POINTER_ATTRIBUTE_IS_MANAGED}
{@link #CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL POINTER_ATTRIBUTE_DEVICE_ORDINAL}{@link #CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE}
{@link #CU_POINTER_ATTRIBUTE_RANGE_START_ADDR POINTER_ATTRIBUTE_RANGE_START_ADDR}{@link #CU_POINTER_ATTRIBUTE_RANGE_SIZE POINTER_ATTRIBUTE_RANGE_SIZE}
{@link #CU_POINTER_ATTRIBUTE_MAPPED POINTER_ATTRIBUTE_MAPPED}{@link #CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES}
{@link #CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE}{@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAGS POINTER_ATTRIBUTE_ACCESS_FLAGS}
{@link #CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE POINTER_ATTRIBUTE_MEMPOOL_HANDLE}{@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE POINTER_ATTRIBUTE_ACCESS_FLAG_NONE}
{@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ POINTER_ATTRIBUTE_ACCESS_FLAG_READ}{@link #CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE}
* @param data a two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to * @param ptr pointer to query */ @NativeType("CUresult") public static int cuPointerGetAttributes(@NativeType("CUpointer_attribute *") IntBuffer attributes, @NativeType("void **") PointerBuffer data, @NativeType("CUdeviceptr") long ptr) { if (CHECKS) { check(data, attributes.remaining()); } return ncuPointerGetAttributes(attributes.remaining(), memAddress(attributes), memAddress(data), ptr); } // --- [ cuStreamCreate ] --- /** Unsafe version of: {@link #cuStreamCreate StreamCreate} */ public static int ncuStreamCreate(long phStream, int Flags) { long __functionAddress = Functions.StreamCreate; return callPI(phStream, Flags, __functionAddress); } /** * Create a stream. * *

Creates a stream and returns a handle in {@code phStream}. The {@code Flags} argument determines behaviors of the stream.

* *

Valid values for {@code Flags} are:

* *
    *
  • {@link #CU_STREAM_DEFAULT STREAM_DEFAULT}: Default stream creation flag.
  • *
  • {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING}: Specifies that work running in the created stream may run concurrently with work in stream 0 (the NULL stream), and that * the created stream should perform no implicit synchronization with stream 0.
  • *
* * @param phStream returned newly created stream * @param Flags parameters for stream creation */ @NativeType("CUresult") public static int cuStreamCreate(@NativeType("CUstream *") PointerBuffer phStream, @NativeType("unsigned int") int Flags) { if (CHECKS) { check(phStream, 1); } return ncuStreamCreate(memAddress(phStream), Flags); } // --- [ cuStreamCreateWithPriority ] --- /** Unsafe version of: {@link #cuStreamCreateWithPriority StreamCreateWithPriority} */ public static int ncuStreamCreateWithPriority(long phStream, int flags, int priority) { long __functionAddress = Functions.StreamCreateWithPriority; return callPI(phStream, flags, priority, __functionAddress); } /** * Create a stream with the given priority. * *

Creates a stream with the specified priority and returns a handle in {@code phStream}. This API alters the scheduler priority of work in the stream. * Work in a higher priority stream may preempt work already executing in a low priority stream.

* *

{@code priority} follows a convention where lower numbers represent higher priorities. {@code 0} represents default priority. The range of meaningful * numerical priorities can be queried using {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}. If the specified priority is outside the numerical range returned by * {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}, it will automatically be clamped to the lowest or the highest number in the range.

* *
Note
* *

Stream priorities are supported only on GPUs with compute capability 3.5 or higher.

* *
Note
* *

In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities * have no effect on host-to-device and device-to-host memory operations.

* * @param phStream returned newly created stream * @param flags flags for stream creation. See {@link #cuStreamCreate StreamCreate} for a list of valid flags * @param priority stream priority. Lower numbers represent higher priorities. See {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange} for more information about meaningful stream * priorities that can be passed. */ @NativeType("CUresult") public static int cuStreamCreateWithPriority(@NativeType("CUstream *") PointerBuffer phStream, @NativeType("unsigned int") int flags, int priority) { if (CHECKS) { check(phStream, 1); } return ncuStreamCreateWithPriority(memAddress(phStream), flags, priority); } // --- [ cuStreamGetPriority ] --- /** Unsafe version of: {@link #cuStreamGetPriority StreamGetPriority} */ public static int ncuStreamGetPriority(long hStream, long priority) { long __functionAddress = Functions.StreamGetPriority; return callPPI(hStream, priority, __functionAddress); } /** * Query the priority of a given stream. * *

Query the priority of a stream created using {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority} and return the priority in {@code priority}. Note that if * the stream was created with a priority outside the numerical range returned by {@link #cuCtxGetStreamPriorityRange CtxGetStreamPriorityRange}, this function returns the clamped * priority. See {@link #cuStreamCreateWithPriority StreamCreateWithPriority} for details about priority clamping.

* * @param hStream handle to the stream to be queried * @param priority pointer to a signed integer in which the stream's priority is returned */ @NativeType("CUresult") public static int cuStreamGetPriority(@NativeType("CUstream") long hStream, @NativeType("int *") IntBuffer priority) { if (CHECKS) { check(priority, 1); } return ncuStreamGetPriority(hStream, memAddress(priority)); } // --- [ cuStreamGetFlags ] --- /** Unsafe version of: {@link #cuStreamGetFlags StreamGetFlags} */ public static int ncuStreamGetFlags(long hStream, long flags) { long __functionAddress = Functions.StreamGetFlags; return callPPI(hStream, flags, __functionAddress); } /** * Query the flags of a given stream. * *

Query the flags of a stream created using {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority} and return the flags in {@code flags}.

* * @param hStream handle to the stream to be queried * @param flags pointer to an unsigned integer in which the stream's flags are returned The value returned in {@code flags} is a logical 'OR' of all flags that * were used while creating this stream. See {@link #cuStreamCreate StreamCreate} for the list of valid flags. */ @NativeType("CUresult") public static int cuStreamGetFlags(@NativeType("CUstream") long hStream, @NativeType("unsigned int *") IntBuffer flags) { if (CHECKS) { check(flags, 1); } return ncuStreamGetFlags(hStream, memAddress(flags)); } // --- [ cuStreamGetCtx ] --- /** Unsafe version of: {@link #cuStreamGetCtx StreamGetCtx} */ public static int ncuStreamGetCtx(long hStream, long pctx) { long __functionAddress = Functions.StreamGetCtx; if (CHECKS) { check(__functionAddress); } return callPPI(hStream, pctx, __functionAddress); } /** * Query the context associated with a stream. * *

Returns the CUDA context that the stream is associated with.

* *

The stream handle {@code hStream} can refer to any of the following:

* *
    *
  • a stream created via any of the CUDA driver APIs such as {@link #cuStreamCreate StreamCreate} and {@link #cuStreamCreateWithPriority StreamCreateWithPriority}, or their runtime API equivalents such as * {@code cudaStreamCreate()}, {@code cudaStreamCreateWithFlags()} and {@code cudaStreamCreateWithPriority()}. The returned context is the context * that was active in the calling thread when the stream was created. Passing an invalid handle will result in undefined behavior.
  • *
  • any of the special streams such as the {@code NULL} stream, {@link #CU_STREAM_LEGACY STREAM_LEGACY} and {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD}. The runtime API equivalents of these are also accepted, * which are {@code NULL}, {@code cudaStreamLegacy()} and {@code cudaStreamPerThread()} respectively. Specifying any of the special handles will return the * context current to the calling thread. If no context is current to the calling thread, {@link #CUDA_ERROR_INVALID_CONTEXT} is returned.
  • *
* * @param hStream handle to the stream to be queried * @param pctx returned context associated with the stream */ @NativeType("CUresult") public static int cuStreamGetCtx(@NativeType("CUstream") long hStream, @NativeType("CUcontext *") PointerBuffer pctx) { if (CHECKS) { check(pctx, 1); } return ncuStreamGetCtx(hStream, memAddress(pctx)); } // --- [ cuStreamWaitEvent ] --- /** * Make a compute stream wait on an event. * *

Makes all future work submitted to {@code hStream} wait for all work captured in {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured * by an event. The synchronization will be performed efficiently on the device when applicable. {@code hEvent} may be from a different context or device * than {@code hStream}.

* * @param hStream stream to wait * @param hEvent event to wait on (may not be {@code NULL}). One of:
{@link #CU_EVENT_WAIT_DEFAULT EVENT_WAIT_DEFAULT}{@link #CU_EVENT_WAIT_EXTERNAL EVENT_WAIT_EXTERNAL}
* @param Flags see {@code CUevent_capture_flags} */ @NativeType("CUresult") public static int cuStreamWaitEvent(@NativeType("CUstream") long hStream, @NativeType("CUevent") long hEvent, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.StreamWaitEvent; if (CHECKS) { check(hEvent); } return callPPI(hStream, hEvent, Flags, __functionAddress); } // --- [ cuStreamAddCallback ] --- /** Unsafe version of: {@link #cuStreamAddCallback StreamAddCallback} */ public static int ncuStreamAddCallback(long hStream, long callback, long userData, int flags) { long __functionAddress = Functions.StreamAddCallback; if (CHECKS) { check(__functionAddress); check(userData); } return callPPPI(hStream, callback, userData, flags, __functionAddress); } /** * Add a callback to a compute stream. * *
Note
* *

This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, * consider using {@link #cuLaunchHostFunc LaunchHostFunc}. Additionally, this function is not supported with {@link #cuStreamBeginCapture StreamBeginCapture} and {@link #cuStreamEndCapture StreamEndCapture}, unlike * {@link #cuLaunchHostFunc LaunchHostFunc}.

* *

Adds a callback to be called on the host after all currently enqueued items in the stream have completed. For each {@code cuStreamAddCallback} call, * the callback will be executed exactly once. The callback will block later work in the stream until it is finished.

* *

The callback may be passed {@link #CUDA_SUCCESS} or an error code. In the event of a device error, all subsequently executed callbacks will receive an * appropriate {@code CUresult}.

* *

Callbacks must not make any CUDA API calls. Attempting to use a CUDA API will result in {@link #CUDA_ERROR_NOT_PERMITTED}. Callbacks must not perform any * synchronization that may depend on outstanding device work or other callbacks that are not mandated to run earlier. Callbacks without a mandated order * (in independent streams) execute in undefined order and may be serialized.

* *

For the purposes of Unified Memory, callback execution makes a number of guarantees:

* *
    *
  • The callback stream is considered idle for the duration of the callback. Thus, for example, a callback may always use memory attached to the * callback stream.
  • *
  • The start of execution of a callback has the same effect as synchronizing an event recorded in the same stream immediately prior to the callback. * It thus synchronizes streams which have been "joined" prior to the callback.
  • *
  • Adding device work to any stream does not have the effect of making the stream active until all preceding host functions and stream callbacks have * executed. Thus, for example, a callback might use global attached memory even if work has been added to another stream, if the work has been * ordered behind the callback with an event.
  • *
  • Completion of a callback does not cause a stream to become active except as described above. The callback stream will remain idle if no device work * follows the callback, and will remain idle across consecutive callbacks without device work in between. Thus, for example, stream synchronization * can be done by signaling from a callback at the end of the stream.
  • *
* * @param hStream stream to add callback to * @param callback the function to call once preceding stream operations are complete * @param userData user specified data to be passed to the callback function * @param flags reserved for future use, must be 0 */ @NativeType("CUresult") public static int cuStreamAddCallback(@NativeType("CUstream") long hStream, @NativeType("void (*) (CUstream, CUresult, void *)") CUstreamCallbackI callback, @NativeType("void *") long userData, @NativeType("unsigned int") int flags) { return ncuStreamAddCallback(hStream, callback.address(), userData, flags); } // --- [ cuStreamBeginCapture ] --- /** * Begins graph capture on a stream. * *

Begin graph capture on {@code hStream}. When a stream is in capture mode, all operations pushed into the stream will not be executed, but will instead * be captured into a graph, which will be returned via {@link #cuStreamEndCapture StreamEndCapture}. Capture may not be initiated if {@code stream} is {@link #CU_STREAM_LEGACY STREAM_LEGACY}. Capture * must be ended on the same stream in which it was initiated, and it may only be initiated if the stream is not already in capture mode. The capture mode * may be queried via {@link #cuStreamIsCapturing StreamIsCapturing}. A unique id representing the capture sequence may be queried via {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo}.

* *
Note
* *

Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is * undefined behavior. This restriction does not apply to texture and surface objects.

* * @param hStream stream in which to initiate capture */ @NativeType("CUresult") public static int cuStreamBeginCapture(@NativeType("CUstream") long hStream) { long __functionAddress = Functions.StreamBeginCapture; if (CHECKS) { check(__functionAddress); } return callPI(hStream, __functionAddress); } // --- [ cuStreamBeginCapture_v2 ] --- /** * Begins graph capture on a stream. * *

Begin graph capture on {@code hStream}. When a stream is in capture mode, all operations pushed into the stream will not be executed, but will instead * be captured into a graph, which will be returned via {@link #cuStreamEndCapture StreamEndCapture}. Capture may not be initiated if {@code stream} is {@link #CU_STREAM_LEGACY STREAM_LEGACY}. Capture * must be ended on the same stream in which it was initiated, and it may only be initiated if the stream is not already in capture mode. The capture mode * may be queried via {@link #cuStreamIsCapturing StreamIsCapturing}. A unique id representing the capture sequence may be queried via {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo}.

* *

If {@code mode} is not {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}, {@link #cuStreamEndCapture StreamEndCapture} must be called on this stream from the same thread.

* *
Note
* *

Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is * undefined behavior. This restriction does not apply to texture and surface objects.

* * @param hStream stream in which to initiate capture * @param mode controls the interaction of this capture sequence with other API calls that are potentially unsafe. For more details see * {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode}. */ @NativeType("CUresult") public static int cuStreamBeginCapture_v2(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureMode") int mode) { long __functionAddress = Functions.StreamBeginCapture_v2; if (CHECKS) { check(__functionAddress); } return callPI(hStream, mode, __functionAddress); } // --- [ cuThreadExchangeStreamCaptureMode ] --- /** Unsafe version of: {@link #cuThreadExchangeStreamCaptureMode ThreadExchangeStreamCaptureMode} */ public static int ncuThreadExchangeStreamCaptureMode(long mode) { long __functionAddress = Functions.ThreadExchangeStreamCaptureMode; if (CHECKS) { check(__functionAddress); } return callPI(mode, __functionAddress); } /** * Swaps the stream capture interaction mode for a thread. * *

Sets the calling thread's stream capture interaction mode to the value contained in {@code *mode}, and overwrites {@code *mode} with the previous mode * for the thread. To facilitate deterministic behavior across function or module boundaries, callers are encouraged to use this API in a push-pop * fashion:

* *

     * CUstreamCaptureMode mode = desiredMode
     * cuThreadExchangeStreamCaptureMode(&mode);
     * ...
     * cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode
* *

During stream capture (see {@link #cuStreamBeginCapture StreamBeginCapture}), some actions, such as a call to {@code cudaMalloc}, may be unsafe. In the case of {@code cudaMalloc}, * the operation is not enqueued asynchronously to a stream, and is not observed by stream capture. Therefore, if the sequence of operations captured via * {@link #cuStreamBeginCapture StreamBeginCapture} depended on the allocation being replayed whenever the graph is launched, the captured graph would be invalid.

* *

Therefore, stream capture places restrictions on API calls that can be made within or concurrently to a {@link #cuStreamBeginCapture StreamBeginCapture}-{@link #cuStreamEndCapture StreamEndCapture} * sequence. This behavior can be controlled via this API and flags to {@code cuStreamBeginCapture}.

* *

A thread's mode is one of the following:

* *
    *
  • {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}: This is the default mode. * *

    If the local thread has an ongoing capture sequence that was not initiated with {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED} at {@link #cuStreamBeginCapture StreamBeginCapture}, or if any * other thread has a concurrent capture sequence initiated with {@link #CU_STREAM_CAPTURE_MODE_GLOBAL STREAM_CAPTURE_MODE_GLOBAL}, this thread is prohibited from potentially unsafe API * calls.

  • *
  • {@link #CU_STREAM_CAPTURE_MODE_THREAD_LOCAL STREAM_CAPTURE_MODE_THREAD_LOCAL}: If the local thread has an ongoing capture sequence not initiated with {@code CU_STREAM_CAPTURE_MODE_RELAXED}, * it is prohibited from potentially unsafe API calls. Concurrent capture sequences in other threads are ignored.
  • *
  • {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}: The local thread is not prohibited from potentially unsafe API calls. Note that the thread is still prohibited from * API calls which necessarily conflict with stream capture, for example, attempting {@link #cuEventQuery EventQuery} on an event that was last recorded inside a capture * sequence.
  • *
* * @param mode pointer to mode value to swap with the current mode */ @NativeType("CUresult") public static int cuThreadExchangeStreamCaptureMode(@NativeType("CUstreamCaptureMode *") IntBuffer mode) { if (CHECKS) { check(mode, 1); } return ncuThreadExchangeStreamCaptureMode(memAddress(mode)); } // --- [ cuStreamEndCapture ] --- /** Unsafe version of: {@link #cuStreamEndCapture StreamEndCapture} */ public static int ncuStreamEndCapture(long hStream, long phGraph) { long __functionAddress = Functions.StreamEndCapture; if (CHECKS) { check(__functionAddress); } return callPPI(hStream, phGraph, __functionAddress); } /** * Ends capture on a stream, returning the captured graph. * *

End capture on {@code hStream}, returning the captured graph via {@code phGraph}. Capture must have been initiated on {@code hStream} via a call to * {@link #cuStreamBeginCapture StreamBeginCapture}. If capture was invalidated, due to a violation of the rules of stream capture, then a NULL graph will be returned.

* *

If the {@code mode} argument to {@link #cuStreamBeginCapture StreamBeginCapture} was not {@link #CU_STREAM_CAPTURE_MODE_RELAXED STREAM_CAPTURE_MODE_RELAXED}, this call must be from the same thread as * {@link #cuStreamBeginCapture StreamBeginCapture}.

* * @param hStream stream to query * @param phGraph the captured graph */ @NativeType("CUresult") public static int cuStreamEndCapture(@NativeType("CUstream") long hStream, @NativeType("CUgraph *") PointerBuffer phGraph) { if (CHECKS) { check(phGraph, 1); } return ncuStreamEndCapture(hStream, memAddress(phGraph)); } // --- [ cuStreamIsCapturing ] --- /** Unsafe version of: {@link #cuStreamIsCapturing StreamIsCapturing} */ public static int ncuStreamIsCapturing(long hStream, long captureStatus) { long __functionAddress = Functions.StreamIsCapturing; if (CHECKS) { check(__functionAddress); } return callPPI(hStream, captureStatus, __functionAddress); } /** * Returns a stream's capture status. * *

Return the capture status of {@code hStream} via {@code captureStatus}. After a successful call, {@code *captureStatus} will contain one of the * following:

* *
    *
  • {@link #CU_STREAM_CAPTURE_STATUS_NONE STREAM_CAPTURE_STATUS_NONE}: The stream is not capturing.
  • *
  • {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}: The stream is capturing.
  • *
  • {@link #CU_STREAM_CAPTURE_STATUS_INVALIDATED STREAM_CAPTURE_STATUS_INVALIDATED}: The stream was capturing but an error has invalidated the capture sequence. The capture sequence must be * terminated with {@link #cuStreamEndCapture StreamEndCapture} on the stream where it was initiated in order to continue using {@code hStream}.
  • *
* *

Note that, if this is called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a blocking stream in the same context is capturing, it will return * {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT} and {@code *captureStatus} is unspecified after the call. The blocking stream capture is not invalidated.

* *

When a blocking stream is capturing, the legacy stream is in an unusable state until the blocking stream capture is terminated. The legacy stream is * not supported for stream capture, but attempted use would have an implicit dependency on the capturing stream(s).

* * @param hStream stream to query * @param captureStatus returns the stream's capture status */ @NativeType("CUresult") public static int cuStreamIsCapturing(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus) { if (CHECKS) { check(captureStatus, 1); } return ncuStreamIsCapturing(hStream, memAddress(captureStatus)); } // --- [ cuStreamGetCaptureInfo ] --- /** Unsafe version of: {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo} */ public static int ncuStreamGetCaptureInfo(long hStream, long captureStatus, long id) { long __functionAddress = Functions.StreamGetCaptureInfo; if (CHECKS) { check(__functionAddress); } return callPPPI(hStream, captureStatus, id, __functionAddress); } /** * Query capture status of a stream. * *

Query the capture status of a stream and and get an id for the capture sequence, which is unique over the lifetime of the process.

* *

If called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a stream not created with {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} is capturing, returns * {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT}.

* *

A valid id is returned only if both of the following are true:

* *
    *
  • the call returns {@link NVRTC#NVRTC_SUCCESS SUCCESS}
  • *
  • {@code captureStatus} is set to {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}
  • *
*/ @NativeType("CUresult") public static int cuStreamGetCaptureInfo(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus, @NativeType("cuuint64_t *") LongBuffer id) { if (CHECKS) { check(captureStatus, 1); check(id, 1); } return ncuStreamGetCaptureInfo(hStream, memAddress(captureStatus), memAddress(id)); } // --- [ cuStreamGetCaptureInfo_v2 ] --- /** Unsafe version of: {@link #cuStreamGetCaptureInfo_v2 StreamGetCaptureInfo_v2} */ public static int ncuStreamGetCaptureInfo_v2(long hStream, long captureStatus_out, long id_out, long graph_out, long dependencies_out, long numDependencies_out) { long __functionAddress = Functions.StreamGetCaptureInfo_v2; if (CHECKS) { check(__functionAddress); } return callPPPPPPI(hStream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out, __functionAddress); } /** * Query a stream's capture state (11.3+). * *

Query stream state related to stream capture.

* *

If called on {@link #CU_STREAM_LEGACY STREAM_LEGACY} (the "null stream") while a stream not created with {@link #CU_STREAM_NON_BLOCKING STREAM_NON_BLOCKING} is capturing, returns * {@link #CUDA_ERROR_STREAM_CAPTURE_IMPLICIT}.

* *

Valid data (other than capture status) is returned only if both of the following are true:

* *
    *
  • the call returns CUDA_SUCCESS
  • *
  • the returned capture status is {@link #CU_STREAM_CAPTURE_STATUS_ACTIVE STREAM_CAPTURE_STATUS_ACTIVE}
  • *
* *

This version of {@code cuStreamGetCaptureInfo} is introduced in CUDA 11.3 and will supplant the previous version in 12.0. Developers requiring * compatibility across minor versions to CUDA 11.0 (driver version 445) should use {@link #cuStreamGetCaptureInfo StreamGetCaptureInfo} or include a fallback path.

* * @param hStream the stream to query * @param captureStatus_out location to return the capture status of the stream; required * @param id_out optional location to return an id for the capture sequence, which is unique over the lifetime of the process * @param graph_out optional location to return the graph being captured into. * *

All operations other than destroy and node removal are permitted on the graph while the capture sequence is in progress. This API does not transfer * ownership of the graph, which is transferred or destroyed at {@link #cuStreamEndCapture StreamEndCapture}. Note that the graph handle may be invalidated before end of * capture for certain errors. Nodes that are or become unreachable from the original stream at {@link #cuStreamEndCapture StreamEndCapture} due to direct actions on the graph * do not trigger {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED}.

* @param dependencies_out optional location to store a pointer to an array of nodes. * *

The next node to be captured in the stream will depend on this set of nodes, absent operations such as event wait which modify this set. The array * pointer is valid until the next API call which operates on the stream or until end of capture. The node handles may be copied out and are valid * until they or the graph is destroyed. The driver-owned array may also be passed directly to APIs that operate on the graph (not the stream) without * copying.

* @param numDependencies_out optional location to store the size of the array returned in {@code dependencies_out} */ @NativeType("CUresult") public static int cuStreamGetCaptureInfo_v2(@NativeType("CUstream") long hStream, @NativeType("CUstreamCaptureStatus *") IntBuffer captureStatus_out, @Nullable @NativeType("cuuint64_t *") LongBuffer id_out, @Nullable @NativeType("CUgraph *") PointerBuffer graph_out, @Nullable @NativeType("CUgraphNode const **") PointerBuffer dependencies_out, @Nullable @NativeType("size_t *") PointerBuffer numDependencies_out) { if (CHECKS) { check(captureStatus_out, 1); checkSafe(id_out, 1); checkSafe(graph_out, 1); checkSafe(dependencies_out, 1); checkSafe(numDependencies_out, 1); } return ncuStreamGetCaptureInfo_v2(hStream, memAddress(captureStatus_out), memAddressSafe(id_out), memAddressSafe(graph_out), memAddressSafe(dependencies_out), memAddressSafe(numDependencies_out)); } // --- [ cuStreamUpdateCaptureDependencies ] --- /** Unsafe version of: {@link #cuStreamUpdateCaptureDependencies StreamUpdateCaptureDependencies} */ public static int ncuStreamUpdateCaptureDependencies(long hStream, long dependencies, long numDependencies, int flags) { long __functionAddress = Functions.StreamUpdateCaptureDependencies; if (CHECKS) { check(__functionAddress); } return callPPPI(hStream, dependencies, numDependencies, flags, __functionAddress); } /** * Update the set of dependencies in a capturing stream (11.3+). * *

Modifies the dependency set of a capturing stream. The dependency set is the set of nodes that the next captured node in the stream will depend on.

* *

Valid flags are {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES} and {@link #CU_STREAM_SET_CAPTURE_DEPENDENCIES STREAM_SET_CAPTURE_DEPENDENCIES}. These control whether the set passed to the API is added * to the existing set or replaces it. A flags value of 0 defaults to {@link #CU_STREAM_ADD_CAPTURE_DEPENDENCIES STREAM_ADD_CAPTURE_DEPENDENCIES}.

* *

Nodes that are removed from the dependency set via this API do not result in {@link #CUDA_ERROR_STREAM_CAPTURE_UNJOINED} if they are unreachable from the * stream at {@link #cuStreamEndCapture StreamEndCapture}.

* *

Returns {@link #CUDA_ERROR_ILLEGAL_STATE} if the stream is not capturing.

* *

This API is new in CUDA 11.3. Developers requiring compatibility across minor versions to CUDA 11.0 should not use this API or provide a fallback.

*/ @NativeType("CUresult") public static int cuStreamUpdateCaptureDependencies(@NativeType("CUstream") long hStream, @NativeType("CUgraphNode *") PointerBuffer dependencies, @NativeType("unsigned int") int flags) { return ncuStreamUpdateCaptureDependencies(hStream, memAddress(dependencies), dependencies.remaining(), flags); } // --- [ cuStreamAttachMemAsync ] --- /** * Attach memory to a stream asynchronously. * *

Enqueues an operation in {@code hStream} to specify stream association of {@code length} bytes of memory starting from {@code dptr}. This function is a * stream-ordered operation, meaning that it is dependent on, and will only take effect when, previous work in stream has completed. Any previous * association is automatically replaced.

* *

{@code dptr} must point to one of the following types of memories:

* *
    *
  • managed memory declared using the __managed__ keyword or allocated with {@link #cuMemAllocManaged MemAllocManaged}.
  • *
  • a valid host-accessible region of system-allocated pageable memory. This type of memory may only be specified if the device associated with the * stream reports a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS}.
  • *
* *

For managed allocations, {@code length} must be either zero or the entire allocation's size. Both indicate that the entire allocation's stream * association is being changed. Currently, it is not possible to change stream association for a portion of a managed allocation.

* *

For pageable host allocations, {@code length} must be non-zero.

* *

The stream association is specified using {@code flags} which must be one of {@code CUmemAttach_flags}. If the {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL} flag is specified, * the memory can be accessed by any stream on any device. If the {@link #CU_MEM_ATTACH_HOST MEM_ATTACH_HOST} flag is specified, the program makes a guarantee that it won't access * the memory on the device from any stream on a device that has a zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If * the {@link #CU_MEM_ATTACH_SINGLE MEM_ATTACH_SINGLE} flag is specified and {@code hStream} is associated with a device that has a zero value for the device attribute * {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}, the program makes a guarantee that it will only access the memory on the device from {@code hStream}. * It is illegal to attach singly to the NULL stream, because the NULL stream is a virtual global stream and not a specific stream. An error will be * returned in this case.

* *

When memory is associated with a single stream, the Unified Memory system will allow CPU access to this memory region so long as all operations in * {@code hStream} have completed, regardless of whether other streams are active. In effect, this constrains exclusive ownership of the managed memory * region by an active GPU to per-stream activity instead of whole-GPU activity.

* *

Accessing memory on the device from streams that are not associated with it will produce undefined results. No error checking is performed by the * Unified Memory system to ensure that kernels launched into other streams do not access this region.

* *

It is a program's responsibility to order calls to {@link #cuStreamAttachMemAsync StreamAttachMemAsync} via events, synchronization or other means to ensure legal access to memory * at all times. Data visibility and coherency will be changed appropriately for all kernels which follow a stream-association change.

* *

If {@code hStream} is destroyed while data is associated with it, the association is removed and the association reverts to the default visibility of * the allocation as specified at {@link #cuMemAllocManaged MemAllocManaged}. For __managed__ variables, the default association is always {@link #CU_MEM_ATTACH_GLOBAL MEM_ATTACH_GLOBAL}. Note that * destroying a stream is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has * completed.

* * @param hStream stream in which to enqueue the attach operation * @param dptr pointer to memory (must be a pointer to managed memory or to a valid host-accessible region of system-allocated pageable memory) * @param length length of memory * @param flags must be one of {@code CUmemAttach_flags} */ @NativeType("CUresult") public static int cuStreamAttachMemAsync(@NativeType("CUstream") long hStream, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long length, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.StreamAttachMemAsync; if (CHECKS) { check(__functionAddress); check(dptr); } return callPPPI(hStream, dptr, length, flags, __functionAddress); } // --- [ cuStreamQuery ] --- /** * Determine status of a compute stream. * *

Returns {@link #CUDA_SUCCESS} if all operations in the stream specified by {@code hStream} have completed, or {@link #CUDA_ERROR_NOT_READY} if not.

* *

For the purposes of Unified Memory, a return value of {@link #CUDA_SUCCESS} is equivalent to having called {@link #cuStreamSynchronize StreamSynchronize}.

* * @param hStream stream to query status of */ @NativeType("CUresult") public static int cuStreamQuery(@NativeType("CUstream") long hStream) { long __functionAddress = Functions.StreamQuery; return callPI(hStream, __functionAddress); } // --- [ cuStreamSynchronize ] --- /** * Wait until a stream's tasks are completed. * *

Waits until the device has completed all operations in the stream specified by {@code hStream}. If the context was created with the * {@link #CU_CTX_SCHED_BLOCKING_SYNC CTX_SCHED_BLOCKING_SYNC} flag, the CPU thread will block until the stream is finished with all of its tasks.

* * @param hStream stream to wait for */ @NativeType("CUresult") public static int cuStreamSynchronize(@NativeType("CUstream") long hStream) { long __functionAddress = Functions.StreamSynchronize; return callPI(hStream, __functionAddress); } // --- [ cuStreamDestroy ] --- /** * Destroys a stream. * *

Destroys the stream specified by {@code hStream}.

* *

In case the device is still doing work in the stream {@code hStream} when {@link #cuStreamDestroy StreamDestroy} is called, the function will return immediately and the * resources associated with {@code hStream} will be released automatically once the device has completed all work in {@code hStream}.

* * @param hStream stream to destroy */ @NativeType("CUresult") public static int cuStreamDestroy(@NativeType("CUstream") long hStream) { long __functionAddress = Functions.StreamDestroy; if (CHECKS) { check(__functionAddress); } return callPI(hStream, __functionAddress); } // --- [ cuStreamCopyAttributes ] --- /** * Copies attributes from source stream to destination stream. * *

Copies attributes from source stream {@code src} to destination stream {@code dst}. Both streams must have the same context.

* * @param dst destination stream * @param src source stream For list of attributes see {@code CUstreamAttrID} */ @NativeType("CUresult") public static int cuStreamCopyAttributes(@NativeType("CUstream") long dst, @NativeType("CUstream") long src) { long __functionAddress = Functions.StreamCopyAttributes; if (CHECKS) { check(__functionAddress); } return callPPI(dst, src, __functionAddress); } // --- [ cuStreamGetAttribute ] --- /** Unsafe version of: {@link #cuStreamGetAttribute StreamGetAttribute} */ public static int ncuStreamGetAttribute(long hStream, int attr, long value_out) { long __functionAddress = Functions.StreamGetAttribute; if (CHECKS) { check(__functionAddress); } return callPPI(hStream, attr, value_out, __functionAddress); } /** * Queries stream attribute. * *

Queries attribute {@code attr} from {@code hStream} and stores it in corresponding member of {@code value_out}.

*/ @NativeType("CUresult") public static int cuStreamGetAttribute(@NativeType("CUstream") long hStream, @NativeType("CUstreamAttrID") int attr, @NativeType("CUstreamAttrValue *") CUstreamAttrValue value_out) { return ncuStreamGetAttribute(hStream, attr, value_out.address()); } // --- [ cuStreamSetAttribute ] --- /** Unsafe version of: {@link #cuStreamSetAttribute StreamSetAttribute} */ public static int ncuStreamSetAttribute(long hStream, int attr, long value) { long __functionAddress = Functions.StreamSetAttribute; if (CHECKS) { check(__functionAddress); } return callPPI(hStream, attr, value, __functionAddress); } /** * Sets stream attribute. * *

Sets attribute {@code attr} on {@code hStream} from corresponding attribute of {@code value}. The updated attribute will be applied to subsequent work * submitted to the stream. It will not affect previously submitted work.

*/ @NativeType("CUresult") public static int cuStreamSetAttribute(@NativeType("CUstream") long hStream, @NativeType("CUstreamAttrID") int attr, @NativeType("CUstreamAttrValue const *") CUstreamAttrValue value) { return ncuStreamSetAttribute(hStream, attr, value.address()); } // --- [ cuEventCreate ] --- /** Unsafe version of: {@link #cuEventCreate EventCreate} */ public static int ncuEventCreate(long phEvent, int Flags) { long __functionAddress = Functions.EventCreate; return callPI(phEvent, Flags, __functionAddress); } /** * Creates an event. * *

Creates an event {@code *phEvent} for the current context with the flags specified via {@code Flags}. Valid flags include:

* *
    *
  • {@link #CU_EVENT_DEFAULT EVENT_DEFAULT}: Default event creation flag.
  • *
  • {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC}: Specifies that the created event should use blocking synchronization. A CPU thread that uses {@link #cuEventSynchronize EventSynchronize} to * wait on an event created with this flag will block until the event has actually been recorded.
  • *
  • {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING}: Specifies that the created event does not need to record timing data. Events created with this flag specified and the * {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag not specified will provide the best performance when used with {@link #cuStreamWaitEvent StreamWaitEvent} and {@link #cuEventQuery EventQuery}.
  • *
  • {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS}: Specifies that the created event may be used as an interprocess event by {@link #cuIpcGetEventHandle IpcGetEventHandle}. {@link #CU_EVENT_INTERPROCESS EVENT_INTERPROCESS} * must be specified along with {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING}.
  • *
* * @param phEvent returns newly created event * @param Flags event creation flags */ @NativeType("CUresult") public static int cuEventCreate(@NativeType("CUevent *") PointerBuffer phEvent, @NativeType("unsigned int") int Flags) { if (CHECKS) { check(phEvent, 1); } return ncuEventCreate(memAddress(phEvent), Flags); } // --- [ cuEventRecord ] --- /** * Records an event. * *

Captures in {@code hEvent} the contents of {@code hStream} at the time of this call. {@code hEvent} and {@code hStream} must be from the same context. * Calls such as {@link #cuEventQuery EventQuery} or {@link #cuStreamWaitEvent StreamWaitEvent} will then examine or wait for completion of the work that was captured. Uses of {@code hStream} * after this call do not modify {@code hEvent}. See note on default stream behavior for what is captured in the default case.

* *

{@link #cuEventRecord EventRecord} can be called multiple times on the same event and will overwrite the previously captured state. Other APIs such as * {@link #cuStreamWaitEvent StreamWaitEvent} use the most recently captured state at the time of the API call, and are not affected by later calls to {@link #cuEventRecord EventRecord}. * Before the first call to {@link #cuEventRecord EventRecord}, an event represents an empty set of work, so for example {@link #cuEventQuery EventQuery} would return {@link #CUDA_SUCCESS}.

* * @param hEvent event to record * @param hStream stream to record event for */ @NativeType("CUresult") public static int cuEventRecord(@NativeType("CUevent") long hEvent, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.EventRecord; if (CHECKS) { check(hEvent); } return callPPI(hEvent, hStream, __functionAddress); } // --- [ cuEventRecordWithFlags ] --- /** * Records an event. * *

Captures in {@code hEvent} the contents of {@code hStream} at the time of this call. {@code hEvent} and {@code hStream} must be from the same context. * Calls such as {@link #cuEventQuery EventQuery} or {@link #cuStreamWaitEvent StreamWaitEvent} will then examine or wait for completion of the work that was captured. Uses of {@code hStream} * after this call do not modify {@code hEvent}. See note on default stream behavior for what is captured in the default case.

* *

{@link #cuEventRecordWithFlags EventRecordWithFlags} can be called multiple times on the same event and will overwrite the previously captured state. Other APIs such as * {@link #cuStreamWaitEvent StreamWaitEvent} use the most recently captured state at the time of the API call, and are not affected by later calls to * {@link #cuEventRecordWithFlags EventRecordWithFlags}. Before the first call to {@link #cuEventRecordWithFlags EventRecordWithFlags}, an event represents an empty set of work, so for example * {@link #cuEventQuery EventQuery} would return {@link #CUDA_SUCCESS}.

* *

flags include:

* *
    *
  • {@link #CU_EVENT_RECORD_DEFAULT EVENT_RECORD_DEFAULT}: Default event creation flag.
  • *
  • {@link #CU_EVENT_RECORD_EXTERNAL EVENT_RECORD_EXTERNAL}: Event is captured in the graph as an external event node when performing stream capture. This flag is invalid outside * of stream capture.
  • *
* * @param hEvent event to record * @param hStream stream to record event for * @param flags see {@code CUevent_capture_flags} */ @NativeType("CUresult") public static int cuEventRecordWithFlags(@NativeType("CUevent") long hEvent, @NativeType("CUstream") long hStream, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.EventRecordWithFlags; if (CHECKS) { check(__functionAddress); check(hEvent); } return callPPI(hEvent, hStream, flags, __functionAddress); } // --- [ cuEventQuery ] --- /** * Queries an event's status. * *

Queries the status of all work currently captured by {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event.

* *

Returns {@link #CUDA_SUCCESS} if all captured work has been completed, or {@link #CUDA_ERROR_NOT_READY} if any captured work is incomplete.

* *

For the purposes of Unified Memory, a return value of {@link #CUDA_SUCCESS} is equivalent to having called {@link #cuEventSynchronize EventSynchronize}.

* * @param hEvent event to query */ @NativeType("CUresult") public static int cuEventQuery(@NativeType("CUevent") long hEvent) { long __functionAddress = Functions.EventQuery; if (CHECKS) { check(hEvent); } return callPI(hEvent, __functionAddress); } // --- [ cuEventSynchronize ] --- /** * Waits for an event to complete. * *

Waits until the completion of all work currently captured in {@code hEvent}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event.

* *

Waiting for an event that was created with the {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag will cause the calling CPU thread to block until the event has been * completed by the device. If the {@link #CU_EVENT_BLOCKING_SYNC EVENT_BLOCKING_SYNC} flag has not been set, then the CPU thread will busy-wait until the event has been completed * by the device.

* * @param hEvent event to wait for */ @NativeType("CUresult") public static int cuEventSynchronize(@NativeType("CUevent") long hEvent) { long __functionAddress = Functions.EventSynchronize; if (CHECKS) { check(hEvent); } return callPI(hEvent, __functionAddress); } // --- [ cuEventDestroy ] --- /** * Destroys an event. * *

Destroys the event specified by {@code hEvent}.

* *

An event may be destroyed before it is complete (i.e., while {@link #cuEventQuery EventQuery} would return {@link #CUDA_ERROR_NOT_READY}). In this case, the call does not * block on completion of the event, and any associated resources will automatically be released asynchronously at completion.

* * @param hEvent event to destroy */ @NativeType("CUresult") public static int cuEventDestroy(@NativeType("CUevent") long hEvent) { long __functionAddress = Functions.EventDestroy; if (CHECKS) { check(__functionAddress); check(hEvent); } return callPI(hEvent, __functionAddress); } // --- [ cuEventElapsedTime ] --- /** Unsafe version of: {@link #cuEventElapsedTime EventElapsedTime} */ public static int ncuEventElapsedTime(long pMilliseconds, long hStart, long hEnd) { long __functionAddress = Functions.EventElapsedTime; if (CHECKS) { check(hStart); check(hEnd); } return callPPPI(pMilliseconds, hStart, hEnd, __functionAddress); } /** * Computes the elapsed time between two events. * *

Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds).

* *

If either event was last recorded in a non-{@code NULL} stream, the resulting time may be greater than expected (even if both used the same stream handle). * This happens because the {@link #cuEventRecord EventRecord} operation takes place asynchronously and there is no guarantee that the measured latency is actually just * between the two events. Any number of other different stream operations could execute in between the two measured events, thus altering the timing in a * significant way.

* *

If {@link #cuEventRecord EventRecord} has not been called on either event then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If {@link #cuEventRecord EventRecord} has been called on both * events but one or both of them has not yet been completed (that is, {@link #cuEventQuery EventQuery} would return {@link #CUDA_ERROR_NOT_READY} on at least one of the * events), {@link #CUDA_ERROR_NOT_READY} is returned. If either event was created with the {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flag, then this function will return * {@link #CUDA_ERROR_INVALID_HANDLE}.

* * @param pMilliseconds time between {@code hStart} and {@code hEnd} in ms * @param hStart starting event * @param hEnd ending event */ @NativeType("CUresult") public static int cuEventElapsedTime(@NativeType("float *") FloatBuffer pMilliseconds, @NativeType("CUevent") long hStart, @NativeType("CUevent") long hEnd) { if (CHECKS) { check(pMilliseconds, 1); } return ncuEventElapsedTime(memAddress(pMilliseconds), hStart, hEnd); } // --- [ cuImportExternalMemory ] --- /** Unsafe version of: {@link #cuImportExternalMemory ImportExternalMemory} */ public static int ncuImportExternalMemory(long extMem_out, long memHandleDesc) { long __functionAddress = Functions.ImportExternalMemory; if (CHECKS) { check(__functionAddress); } return callPPI(extMem_out, memHandleDesc, __functionAddress); } /** * Imports an external memory object. * *

Imports an externally allocated memory object and returns a handle to that in {@code extMem_out}.

* *

The properties of the handle being imported must be described in {@code memHandleDesc}.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a memory object. * Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any operations on the file * descriptor after it is imported results in undefined behavior.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name} * must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that references a memory object. * Ownership of this handle is not transferred to CUDA after the import operation, so the application must release the handle using the appropriate system * call. If {@code ::handle::win32::name} is not NULL, then it must point to a NULL-terminated array of UTF-16 characters that * refers to a memory object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT}, then {@code ::handle::win32::handle} must be non-{@code NULL} and * {@code ::handle::win32::name} must be {@code NULL}. The handle specified must be a globally shared KMT handle. This handle does not hold a reference to the * underlying object, and thus will be invalid when all references to the memory object are destroyed.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name} * must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by * {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Heap} object. This handle holds a reference to the underlying object. If * {@code ::handle::win32::name} is not {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D12Heap} * object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name} * must not be NULL. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by * {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Resource} object. This handle holds a reference to the underlying object. If * {@code ::handle::win32::name} is not {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D12Resource} * object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE}, then {@code ::handle::win32::handle} must represent a valid shared NT handle that is\ * returned by {@code IDXGIResource1::CreateSharedHandle} when referring to a {@code ID3D11Resource} object. If {@code ::handle::win32::name} is not * {@code NULL}, then it must point to a {@code NULL}-terminated array of UTF-16 characters that refers to a {@code ID3D11Resource} object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT}, then {@code ::handle::win32::handle} must represent a valid shared KMT handle * that is returned by {@code IDXGIResource::GetSharedHandle} when referring to a {@code ID3D11Resource} object and {@code ::handle::win32::name} must be * {@code NULL}.

* *

If {@code ::type} is {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, then {@code ::handle::nvSciBufObject} must be non-{@code NULL} and reference a valid * {@code NvSciBuf} object. If the {@code NvSciBuf} object imported into CUDA is also mapped by other drivers, then the application must use * {@link #cuWaitExternalSemaphoresAsync WaitExternalSemaphoresAsync} or {@link #cuSignalExternalSemaphoresAsync SignalExternalSemaphoresAsync} as appropriate barriers to maintain coherence between CUDA and the other drivers. * See {@link CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC} and {@link CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC} for memory synchronization.

* *

The size of the memory object must be specified in {@code ::size}.

* *

Specifying the flag {@link #CUDA_EXTERNAL_MEMORY_DEDICATED} in {@code ::flags} indicates that the resource is a dedicated resource. The definition of what a * dedicated resource is outside the scope of this extension. This flag must be set if {@code ::type} is one of the following:

* *
    *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE}
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE}
  • *
  • {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT}
  • *
* *
Note
* *

If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use * {@code vkInvalidateMappedMemoryRanges}/{@code vkFlushMappedMemoryRanges} as well as appropriate Vulkan pipeline barriers to maintain coherence between * CPU and GPU. For more information on these APIs, please refer to "Synchronization and Cache Control" chapter from Vulkan specification.

* * @param extMem_out returned handle to an external memory object * @param memHandleDesc memory import handle descriptor */ @NativeType("CUresult") public static int cuImportExternalMemory(@NativeType("CUexternalMemory *") PointerBuffer extMem_out, @NativeType("CUDA_EXTERNAL_MEMORY_HANDLE_DESC const *") CUDA_EXTERNAL_MEMORY_HANDLE_DESC memHandleDesc) { if (CHECKS) { check(extMem_out, 1); } return ncuImportExternalMemory(memAddress(extMem_out), memHandleDesc.address()); } // --- [ cuExternalMemoryGetMappedBuffer ] --- /** Unsafe version of: {@link #cuExternalMemoryGetMappedBuffer ExternalMemoryGetMappedBuffer} */ public static int ncuExternalMemoryGetMappedBuffer(long devPtr, long extMem, long bufferDesc) { long __functionAddress = Functions.ExternalMemoryGetMappedBuffer; if (CHECKS) { check(__functionAddress); check(extMem); } return callPPPI(devPtr, extMem, bufferDesc, __functionAddress); } /** * Maps a buffer onto an imported memory object. * *

Maps a buffer onto an imported memory object and returns a device pointer in {@code devPtr}.

* *

The properties of the buffer being mapped must be described in {@code bufferDesc}.

* *

The offset and size have to be suitably aligned to match the requirements of the external API. Mapping two buffers whose ranges overlap may or may not * result in the same virtual address being returned for the overlapped portion. In such cases, the application must ensure that all accesses to that * region from the GPU are volatile. Otherwise writes made via one address are not guaranteed to be visible via the other address, even if they're issued * by the same thread. It is recommended that applications map the combined range instead of mapping separate buffers and then apply the appropriate * offsets to the returned pointer to derive the individual buffers.

* *

The returned pointer {@code devPtr} must be freed using {@link #cuMemFree MemFree}.

* * @param devPtr returned device pointer to buffer * @param extMem handle to external memory object * @param bufferDesc buffer descriptor */ @NativeType("CUresult") public static int cuExternalMemoryGetMappedBuffer(@NativeType("CUdeviceptr *") PointerBuffer devPtr, @NativeType("CUexternalMemory") long extMem, @NativeType("CUDA_EXTERNAL_MEMORY_BUFFER_DESC const *") CUDA_EXTERNAL_MEMORY_BUFFER_DESC bufferDesc) { if (CHECKS) { check(devPtr, 1); } return ncuExternalMemoryGetMappedBuffer(memAddress(devPtr), extMem, bufferDesc.address()); } // --- [ cuExternalMemoryGetMappedMipmappedArray ] --- /** Unsafe version of: {@link #cuExternalMemoryGetMappedMipmappedArray ExternalMemoryGetMappedMipmappedArray} */ public static int ncuExternalMemoryGetMappedMipmappedArray(long mipmap, long extMem, long mipmapDesc) { long __functionAddress = Functions.ExternalMemoryGetMappedMipmappedArray; if (CHECKS) { check(__functionAddress); check(extMem); } return callPPPI(mipmap, extMem, mipmapDesc, __functionAddress); } /** * Maps a CUDA mipmapped array onto an external memory object. * *

Maps a CUDA mipmapped array onto an external object and returns a handle to it in {@code mipmap}.

* *

The properties of the CUDA mipmapped array being mapped must be described in {@code mipmapDesc}.

* *

{@code ::offset} is the offset in the memory object where the base level of the mipmap chain is. {@code ::arrayDesc} describes the format, dimensions * and type of the base level of the mipmap chain. For further details on these parameters, please refer to the documentation for {@link #cuMipmappedArrayCreate MipmappedArrayCreate}. * Note that if the mipmapped array is bound as a color target in the graphics API, then the flag {@link #CUDA_ARRAY3D_COLOR_ATTACHMENT} must be specified in * {@code ::arrayDesc::Flags}. {@code ::numLevels} specifies the total number of levels in the mipmap chain.

* *

If {@code extMem} was imported from a handle of type {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}, then {@code ::numLevels} must be equal to 1.

* *

The returned CUDA mipmapped array must be freed using {@link #cuMipmappedArrayDestroy MipmappedArrayDestroy}.

* * @param mipmap returned CUDA mipmapped array * @param extMem handle to external memory object * @param mipmapDesc CUDA array descriptor */ @NativeType("CUresult") public static int cuExternalMemoryGetMappedMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer mipmap, @NativeType("CUexternalMemory") long extMem, @NativeType("CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC const *") CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc) { if (CHECKS) { check(mipmap, 1); } return ncuExternalMemoryGetMappedMipmappedArray(memAddress(mipmap), extMem, mipmapDesc.address()); } // --- [ cuDestroyExternalMemory ] --- /** * Destroys an external memory object. * *

Destroys the specified external memory object. Any existing buffers and CUDA mipmapped arrays mapped onto this object must no longer be used and must * be explicitly freed using {@link #cuMemFree MemFree} and {@link #cuMipmappedArrayDestroy MipmappedArrayDestroy} respectively.

* * @param extMem external memory object to be destroyed */ @NativeType("CUresult") public static int cuDestroyExternalMemory(@NativeType("CUexternalMemory") long extMem) { long __functionAddress = Functions.DestroyExternalMemory; if (CHECKS) { check(__functionAddress); check(extMem); } return callPI(extMem, __functionAddress); } // --- [ cuImportExternalSemaphore ] --- /** Unsafe version of: {@link #cuImportExternalSemaphore ImportExternalSemaphore} */ public static int ncuImportExternalSemaphore(long extSem_out, long semHandleDesc) { long __functionAddress = Functions.ImportExternalSemaphore; if (CHECKS) { check(__functionAddress); } return callPPI(extSem_out, semHandleDesc, __functionAddress); } /** * Imports an external semaphore. * *

Imports an externally allocated synchronization object and returns a handle to that in {@code extSem_out}.

* *

The properties of the handle being imported must be described in {@code semHandleDesc}.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a synchronization * object. Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any operations on the * file descriptor after it is imported results in undefined behavior.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, then exactly one of {@code ::handle::win32::handle} and * {@code ::handle::win32::name} must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that * references a synchronization object. Ownership of this handle is not transferred to CUDA after the import operation, so the application must release * the handle using the appropriate system call. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT}, then {@code ::handle::win32::handle} must be non-NULL and * {@code ::handle::win32::name} must be {@code NULL}. The handle specified must be a globally shared KMT handle. This handle does not hold a reference to the * underlying object, and thus will be invalid when all references to the synchronization object are destroyed.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE}, then exactly one of {@code ::handle::win32::handle} and {@code ::handle::win32::name} * must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that is returned by * {@code ID3D12Device::CreateSharedHandle} when referring to a {@code ID3D12Fence} object. This handle holds a reference to the underlying object. If * {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object that refers to a valid {@code ID3D12Fence} object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, then {@code ::handle::win32::handle} represents a valid shared NT handle that is * returned by {@code ID3D11Fence::CreateSharedHandle}. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object * that refers to a valid {@code ID3D11Fence} object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, then {@code ::handle::nvSciSyncObj} represents a valid {@code NvSciSyncObj}.

* *

{@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX}, then {@code ::handle::win32::handle} represents a valid shared NT handle that is returned by * {@code IDXGIResource1::CreateSharedHandle} when referring to a {@code IDXGIKeyedMutex} object. If {@code ::handle::win32::name} is not {@code NULL}, then it * must name a valid synchronization object that refers to a valid {@code IDXGIKeyedMutex} object.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT}, then {@code ::handle::win32::handle} represents a valid shared KMT handle * that is returned by {@code IDXGIResource::GetSharedHandle} when referring to a {@code IDXGIKeyedMutex} object and {@code ::handle::win32::name} must be * {@code NULL}.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD}, then {@code ::handle::fd} must be a valid file descriptor referencing a * synchronization object. Ownership of the file descriptor is transferred to the CUDA driver when the handle is imported successfully. Performing any * operations on the file descriptor after it is imported results in undefined behavior.

* *

If {@code ::type} is {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32}, then exactly one of {@code ::handle::win32::handle} and * {@code ::handle::win32::name} must not be {@code NULL}. If {@code ::handle::win32::handle} is not {@code NULL}, then it must represent a valid shared NT handle that * references a synchronization object. Ownership of this handle is not transferred to CUDA after the import operation, so the application must release * the handle using the appropriate system call. If {@code ::handle::win32::name} is not {@code NULL}, then it must name a valid synchronization object.

* * @param extSem_out returned handle to an external semaphore * @param semHandleDesc semaphore import handle descriptor */ @NativeType("CUresult") public static int cuImportExternalSemaphore(@NativeType("CUexternalSemaphore *") PointerBuffer extSem_out, @NativeType("CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC const *") CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semHandleDesc) { if (CHECKS) { check(extSem_out, 1); } return ncuImportExternalSemaphore(memAddress(extSem_out), semHandleDesc.address()); } // --- [ cuSignalExternalSemaphoresAsync ] --- /** * Unsafe version of: {@link #cuSignalExternalSemaphoresAsync SignalExternalSemaphoresAsync} * * @param numExtSems number of semaphores to signal */ public static int ncuSignalExternalSemaphoresAsync(long extSemArray, long paramsArray, int numExtSems, long stream) { long __functionAddress = Functions.SignalExternalSemaphoresAsync; if (CHECKS) { check(__functionAddress); } return callPPPI(extSemArray, paramsArray, numExtSems, stream, __functionAddress); } /** * Signals a set of external semaphore objects, * *

Enqueues a signal operation on a set of externally allocated semaphore object in the specified stream. The operations will be executed when all prior * operations in the stream complete.

* *

The exact semantics of signaling a semaphore depends on the type of the object.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} then signaling the semaphore will set it to the * signaled state.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} then the semaphore will be set to the value specified in {@code ::params::fence::value}.

* *

If the semaphore object is of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} this API sets * {@code ::params::nvSciSync::fence} to a value that can be used by subsequent waiters of the same {@code NvSciSync} object to order operations with * those currently submitted in {@code stream}. Such an update will overwrite previous contents of {@code ::params::nvSciSync::fence}. By default, * signaling such an external semaphore object causes appropriate memory synchronization operations to be performed over all external memory objects that * are imported as {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}. This ensures that any subsequent accesses made by other importers of the same set of NvSciBuf * memory object(s) are coherent. These operations can be skipped by specifying the flag {@link #CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC}, which can * be used as a performance optimization when data coherency is not required. But specifying this flag in scenarios where data coherency is required * results in undefined behavior. Also, for semaphore object of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, if the {@code NvSciSyncAttrList} used * to create the {@code NvSciSyncObj} had not set the flags in {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} to {@link #CUDA_NVSCISYNC_ATTR_SIGNAL}, this API will return * {@link #CUDA_ERROR_NOT_SUPPORTED}.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} then the keyed mutex will be released with the key specified in {@code ::params::keyedmutex::key}.

* * @param extSemArray set of external semaphores to be signaled * @param paramsArray array of semaphore parameters * @param stream stream to enqueue the signal operations in */ @NativeType("CUresult") public static int cuSignalExternalSemaphoresAsync(@NativeType("CUexternalSemaphore const *") PointerBuffer extSemArray, @NativeType("CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS const *") CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS.Buffer paramsArray, @NativeType("CUstream") long stream) { if (CHECKS) { check(paramsArray, extSemArray.remaining()); } return ncuSignalExternalSemaphoresAsync(memAddress(extSemArray), paramsArray.address(), extSemArray.remaining(), stream); } // --- [ cuWaitExternalSemaphoresAsync ] --- /** * Unsafe version of: {@link #cuWaitExternalSemaphoresAsync WaitExternalSemaphoresAsync} * * @param numExtSems number of semaphores to wait on */ public static int ncuWaitExternalSemaphoresAsync(long extSemArray, long paramsArray, int numExtSems, long stream) { long __functionAddress = Functions.WaitExternalSemaphoresAsync; if (CHECKS) { check(__functionAddress); } return callPPPI(extSemArray, paramsArray, numExtSems, stream, __functionAddress); } /** * Waits on a set of external semaphore objects. * *

Enqueues a wait operation on a set of externally allocated semaphore object in the specified stream. The operations will be executed when all prior * operations in the stream complete.

* *

The exact semantics of waiting on a semaphore depends on the type of the object.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT} then waiting on the semaphore will wait until * the semaphore reaches the signaled state. The semaphore will then be reset to the unsignaled state. Therefore for every signal operation, there can * only be one wait operation.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE}, {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32} then waiting on the semaphore will wait until the value of the semaphore is greater than * or equal to {@code ::params::fence::value}.

* *

If the semaphore object is of the type {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC} then, waiting on the semaphore will wait until the * {@code ::params::nvSciSync::fence} is signaled by the signaler of the NvSciSyncObj that was associated with this semaphore object. By default, waiting * on such an external semaphore object causes appropriate memory synchronization operations to be performed over all external memory objects that are * imported as {@link #CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF}. This ensures that any subsequent accesses made by other importers of the same set of * {@code NvSciBuf} memory object(s) are coherent. These operations can be skipped by specifying the flag * {@link #CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC}, which can be used as a performance optimization when data coherency is not required. But * specifying this flag in scenarios where data coherency is required results in undefined behavior. Also, for semaphore object of the type * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC}, if the {@code NvSciSyncAttrList} used to create the {@code NvSciSyncObj} had not set the flags in * {@link #cuDeviceGetNvSciSyncAttributes DeviceGetNvSciSyncAttributes} to {@link #CUDA_NVSCISYNC_ATTR_WAIT}, this API will return {@link #CUDA_ERROR_NOT_SUPPORTED}.

* *

If the semaphore object is any one of the following types: {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX}, * {@link #CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT} then the keyed mutex will be acquired when it is released with the key specified in * {@code ::params::keyedmutex::key} or until the timeout specified by {@code ::params::keyedmutex::timeoutMs} has lapsed. The timeout interval can either * be a finite value specified in milliseconds or an infinite value. In case an infinite value is specified the timeout never elapses. The windows * {@code INFINITE} macro must be used to specify infinite timeout.

* * @param extSemArray external semaphores to be waited on * @param paramsArray array of semaphore parameters * @param stream stream to enqueue the wait operations in */ @NativeType("CUresult") public static int cuWaitExternalSemaphoresAsync(@NativeType("CUexternalSemaphore const *") PointerBuffer extSemArray, @NativeType("CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS const *") CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS.Buffer paramsArray, @NativeType("CUstream") long stream) { if (CHECKS) { check(paramsArray, extSemArray.remaining()); } return ncuWaitExternalSemaphoresAsync(memAddress(extSemArray), paramsArray.address(), extSemArray.remaining(), stream); } // --- [ cuDestroyExternalSemaphore ] --- /** * Destroys an external semaphore. * *

Destroys an external semaphore object and releases any references to the underlying resource. Any outstanding signals or waits must have completed * before the semaphore is destroyed.

* * @param extSem external semaphore to be destroyed */ @NativeType("CUresult") public static int cuDestroyExternalSemaphore(@NativeType("CUexternalSemaphore") long extSem) { long __functionAddress = Functions.DestroyExternalSemaphore; if (CHECKS) { check(__functionAddress); check(extSem); } return callPI(extSem, __functionAddress); } // --- [ cuStreamWaitValue32 ] --- /** * Wait on a memory location. * *

Enqueues a synchronization of the stream on the given memory location. Work ordered after the operation will block until the given condition on the * memory is satisfied. By default, the condition is to wait for {@code (int32_t)(*addr - value) >= 0}, a cyclic greater-or-equal. Other condition types * can be specified via {@code flags}.

* *

If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}. This function cannot be * used with managed memory ({@link #cuMemAllocManaged MemAllocManaged}).

* *

Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}.

* *

Support for {@link #CU_STREAM_WAIT_VALUE_NOR STREAM_WAIT_VALUE_NOR} can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR}.

* * @param stream the stream to synchronize on the memory location * @param addr the memory location to wait on * @param value the value to compare with the memory location * @param flags see {@code CUstreamWaitValue_flags} */ @NativeType("CUresult") public static int cuStreamWaitValue32(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint32_t") int value, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.StreamWaitValue32; if (CHECKS) { check(__functionAddress); check(addr); } return callPPI(stream, addr, value, flags, __functionAddress); } // --- [ cuStreamWaitValue64 ] --- /** * Wait on a memory location. * *

Enqueues a synchronization of the stream on the given memory location. Work ordered after the operation will block until the given condition on the * memory is satisfied. By default, the condition is to wait for {@code (int64_t)(*addr - value) >= 0}, a cyclic greater-or-equal. Other condition types * can be specified via {@code flags}.

* *

If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.

* *

Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS}.

* * @param stream the stream to synchronize on the memory location * @param addr the memory location to wait on * @param value the value to compare with the memory location * @param flags see {@code CUstreamWaitValue_flags} */ @NativeType("CUresult") public static int cuStreamWaitValue64(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint64_t") long value, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.StreamWaitValue64; if (CHECKS) { check(__functionAddress); check(addr); } return callPPJI(stream, addr, value, flags, __functionAddress); } // --- [ cuStreamWriteValue32 ] --- /** * Write a value to memory. * *

Write a value to memory. Unless the {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} flag is passed, the write is preceded by a system-wide memory fence, * equivalent to a {@code __threadfence_system()} but scoped to the stream rather than a CUDA thread.

* *

If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}. This function cannot * be used with managed memory ({@link #cuMemAllocManaged MemAllocManaged}).

* *

Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}.

* * @param stream the stream to do the write in * @param addr the device address to write to * @param value the value to write * @param flags see {@code CUstreamWriteValue_flags} */ @NativeType("CUresult") public static int cuStreamWriteValue32(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint32_t") int value, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.StreamWriteValue32; if (CHECKS) { check(__functionAddress); check(addr); } return callPPI(stream, addr, value, flags, __functionAddress); } // --- [ cuStreamWriteValue64 ] --- /** * Write a value to memory. * *

Write a value to memory. Unless the {@link #CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER STREAM_WRITE_VALUE_NO_MEMORY_BARRIER} flag is passed, the write is preceded by a system-wide memory fence, * equivalent to a {@code __threadfence_system()} but scoped to the stream rather than a CUDA thread.

* *

If the memory was registered via {@link #cuMemHostRegister MemHostRegister}, the device pointer should be obtained with {@link #cuMemHostGetDevicePointer MemHostGetDevicePointer}.

* *

Support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS}.

* * @param stream the stream to do the write in * @param addr the device address to write to * @param value the value to write * @param flags see {@code CUstreamWriteValue_flags} */ @NativeType("CUresult") public static int cuStreamWriteValue64(@NativeType("CUstream") long stream, @NativeType("CUdeviceptr") long addr, @NativeType("cuuint64_t") long value, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.StreamWriteValue64; if (CHECKS) { check(__functionAddress); check(addr); } return callPPJI(stream, addr, value, flags, __functionAddress); } // --- [ cuStreamBatchMemOp ] --- /** * Unsafe version of: {@link #cuStreamBatchMemOp StreamBatchMemOp} * * @param count the number of operations in the array. Must be less than 256. */ public static int ncuStreamBatchMemOp(long stream, int count, long paramArray, int flags) { long __functionAddress = Functions.StreamBatchMemOp; if (CHECKS) { check(__functionAddress); } return callPPI(stream, count, paramArray, flags, __functionAddress); } /** * Batch operations to synchronize the stream via memory operations. * *

This is a batch version of {@link #cuStreamWaitValue32 StreamWaitValue32} and {@link #cuStreamWriteValue32 StreamWriteValue32}. Batching operations may avoid some performance overhead in both the * API call and the device execution versus adding them to the stream in separate API calls. The operations are enqueued in the order they appear in the * array.

* *

See {@code CUstreamBatchMemOpType} for the full set of supported operations, and {@link #cuStreamWaitValue32 StreamWaitValue32}, {@link #cuStreamWaitValue64 StreamWaitValue64}, {@link #cuStreamWriteValue32 StreamWriteValue32}, * and {@link #cuStreamWriteValue64 StreamWriteValue64} for details of specific operations.

* *

Basic support for this can be queried with {@link #cuDeviceGetAttribute DeviceGetAttribute} and {@link #CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS}. See related APIs for details on * querying support for specific operations.

* * @param stream the stream to enqueue the operations in * @param paramArray the types and parameters of the individual operations * @param flags reserved for future expansion; must be 0 */ @NativeType("CUresult") public static int cuStreamBatchMemOp(@NativeType("CUstream") long stream, @NativeType("CUstreamBatchMemOpParams *") CUstreamBatchMemOpParams.Buffer paramArray, @NativeType("unsigned int") int flags) { return ncuStreamBatchMemOp(stream, paramArray.remaining(), paramArray.address(), flags); } // --- [ cuFuncGetAttribute ] --- /** Unsafe version of: {@link #cuFuncGetAttribute FuncGetAttribute} */ public static int ncuFuncGetAttribute(long pi, int attrib, long hfunc) { long __functionAddress = Functions.FuncGetAttribute; if (CHECKS) { check(hfunc); } return callPPI(pi, attrib, hfunc, __functionAddress); } /** * Returns information about a function. * *

Returns in {@code *pi} the integer value of the attribute {@code attrib} on the kernel given by {@code hfunc}. The supported attributes are:

* *
    *
  • {@link #CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK}: The maximum number of threads per block, beyond which a launch of the function would fail. This number * depends on both the function and the device on which the function is currently loaded.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES}: The size in bytes of statically-allocated shared memory per block required by this function. This does not * include dynamically-allocated shared memory requested by the user at runtime.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES FUNC_ATTRIBUTE_CONST_SIZE_BYTES}: The size in bytes of user-allocated constant memory required by this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES}: The size in bytes of local memory used by each thread of this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_NUM_REGS FUNC_ATTRIBUTE_NUM_REGS}: The number of registers used by each thread of this function.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_PTX_VERSION FUNC_ATTRIBUTE_PTX_VERSION}: The PTX virtual architecture version for which the function was compiled. This value is the major PTX version * 10 * + the minor PTX version, so a PTX version 1.3 function would return the value 13. Note that this may return the undefined value of 0 for cubins * compiled prior to CUDA 3.0.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_BINARY_VERSION FUNC_ATTRIBUTE_BINARY_VERSION}: The binary architecture version for which the function was compiled. This value is the major binary version * * 10 + the minor binary version, so a binary version 1.3 function would return the value 13. Note that this will return a value of 10 for legacy * cubins that do not have a properly-encoded binary architecture version.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_CACHE_MODE_CA FUNC_ATTRIBUTE_CACHE_MODE_CA}: The attribute to indicate whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" * set.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES}: The maximum size in bytes of dynamically-allocated shared memory.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT}: Preferred shared memory-L1 cache split ratio in percent of total shared memory.
  • *
* * @param pi returned attribute value * @param attrib attribute requested * @param hfunc function to query attribute of */ @NativeType("CUresult") public static int cuFuncGetAttribute(@NativeType("int *") IntBuffer pi, @NativeType("CUfunction_attribute") int attrib, @NativeType("CUfunction") long hfunc) { if (CHECKS) { check(pi, 1); } return ncuFuncGetAttribute(memAddress(pi), attrib, hfunc); } // --- [ cuFuncSetAttribute ] --- /** * Sets information about a function. * *

This call sets the value of a specified attribute {@code attrib} on the kernel given by {@code hfunc} to an integer value specified by {@code val} This * function returns {@link #CUDA_SUCCESS} if the new value of the attribute could be successfully set. If the set fails, this call will return an error. Not all * attributes can have values set. Attempting to set a value on a read-only attribute will result in an error ({@link #CUDA_ERROR_INVALID_VALUE}).

* *

Supported attributes for the cuFuncSetAttribute call are:

* *
    *
  • {@link #CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES}: This maximum size in bytes of dynamically-allocated shared memory. The value should contain the * requested maximum size of dynamically-allocated shared memory. The sum of this value and the function attribute * {@link #CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES FUNC_ATTRIBUTE_SHARED_SIZE_BYTES} cannot exceed the device attribute {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN}. The maximal size * of requestable dynamic shared memory may differ by GPU architecture.
  • *
  • {@link #CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT}: On devices where the L1 cache and shared memory use the same hardware resources, this sets * the shared memory carveout preference, in percent of the total shared memory. See {@link #CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR} This * is only a hint, and the driver can choose a different ratio if required to execute the function.
  • *
* * @param hfunc function to query attribute of * @param attrib attribute requested * @param value the value to set */ @NativeType("CUresult") public static int cuFuncSetAttribute(@NativeType("CUfunction") long hfunc, @NativeType("CUfunction_attribute") int attrib, int value) { long __functionAddress = Functions.FuncSetAttribute; if (CHECKS) { check(__functionAddress); check(hfunc); } return callPI(hfunc, attrib, value, __functionAddress); } // --- [ cuFuncSetCacheConfig ] --- /** * Sets the preferred cache configuration for a device function. * *

On devices where the L1 cache and shared memory use the same hardware resources, this sets through {@code config} the preferred cache configuration for * the device function {@code hfunc}. This is only a preference. The driver will use the requested configuration if possible, but it is free to choose a * different configuration if required to execute {@code hfunc}. Any context-wide preference set via {@link #cuCtxSetCacheConfig CtxSetCacheConfig} will be overridden by this * per-function setting unless the per-function setting is {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE}. In that case, the current context-wide setting will be used.

* *

This setting does nothing on devices where the size of the L1 cache and shared memory are fixed.

* *

Launching a kernel with a different preference than the most recent preference setting may insert a device-side synchronization point.

* *

The supported cache configurations are:

* *
    *
  • {@link #CU_FUNC_CACHE_PREFER_NONE FUNC_CACHE_PREFER_NONE}: no preference for shared memory or L1 (default)
  • *
  • {@link #CU_FUNC_CACHE_PREFER_SHARED FUNC_CACHE_PREFER_SHARED}: prefer larger shared memory and smaller L1 cache
  • *
  • {@link #CU_FUNC_CACHE_PREFER_L1 FUNC_CACHE_PREFER_L1}: prefer larger L1 cache and smaller shared memory
  • *
  • {@link #CU_FUNC_CACHE_PREFER_EQUAL FUNC_CACHE_PREFER_EQUAL}: prefer equal sized L1 cache and shared memory
  • *
* * @param hfunc kernel to configure cache for * @param config requested cache configuration */ @NativeType("CUresult") public static int cuFuncSetCacheConfig(@NativeType("CUfunction") long hfunc, @NativeType("CUfunc_cache") int config) { long __functionAddress = Functions.FuncSetCacheConfig; if (CHECKS) { check(hfunc); } return callPI(hfunc, config, __functionAddress); } // --- [ cuFuncSetSharedMemConfig ] --- /** * Sets the shared memory configuration for a device function. * *

On devices with configurable shared memory banks, this function will force all subsequent launches of the specified device function to have the given * shared memory bank size configuration. On any given launch of the function, the shared memory configuration of the device will be temporarily changed * if needed to suit the function's preferred configuration. Changes in shared memory configuration between subsequent launches of functions, may * introduce a device side synchronization point.

* *

Any per-function setting of shared memory bank size set via {@link #cuFuncSetSharedMemConfig FuncSetSharedMemConfig} will override the context wide setting set with * {@link #cuCtxSetSharedMemConfig CtxSetSharedMemConfig}.

* *

Changing the shared memory bank size will not increase shared memory usage or affect occupancy of kernels, but may have major effects on performance. * Larger bank sizes will allow for greater potential bandwidth to shared memory, but will change what kinds of accesses to shared memory will result in * bank conflicts.

* *

This function will do nothing on devices with fixed shared memory bank size.

* *

The supported bank configurations are:

* *
    *
  • {@link #CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE}: use the context's shared memory configuration when launching this function.
  • *
  • {@link #CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE}: set shared memory bank width to be natively four bytes when launching this function.
  • *
  • {@link #CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE}: set shared memory bank width to be natively eight bytes when launching this function.
  • *
* * @param hfunc kernel to be given a shared memory config * @param config requested shared memory configuration */ @NativeType("CUresult") public static int cuFuncSetSharedMemConfig(@NativeType("CUfunction") long hfunc, @NativeType("CUsharedconfig") int config) { long __functionAddress = Functions.FuncSetSharedMemConfig; if (CHECKS) { check(__functionAddress); check(hfunc); } return callPI(hfunc, config, __functionAddress); } // --- [ cuFuncGetModule ] --- /** Unsafe version of: {@link #cuFuncGetModule FuncGetModule} */ public static int ncuFuncGetModule(long hmod, long hfunc) { long __functionAddress = Functions.FuncGetModule; if (CHECKS) { check(__functionAddress); check(hfunc); } return callPPI(hmod, hfunc, __functionAddress); } /** * Returns a module handle. * *

Returns in {@code *hmod} the handle of the module that function {@code hfunc} is located in. The lifetime of the module corresponds to the lifetime of * the context it was loaded in or until the module is explicitly unloaded.

* *

The CUDA runtime manages its own modules loaded into the primary context. If the handle returned by this API refers to a module loaded by the CUDA * runtime, calling {@link #cuModuleUnload ModuleUnload} on that module will result in undefined behavior.

* * @param hmod returned module handle * @param hfunc function to retrieve module for */ @NativeType("CUresult") public static int cuFuncGetModule(@NativeType("CUmodule *") PointerBuffer hmod, @NativeType("CUfunction") long hfunc) { if (CHECKS) { check(hmod, 1); } return ncuFuncGetModule(memAddress(hmod), hfunc); } // --- [ cuLaunchKernel ] --- /** Unsafe version of: {@link #cuLaunchKernel LaunchKernel} */ public static int ncuLaunchKernel(long f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, long hStream, long kernelParams, long extra) { long __functionAddress = Functions.LaunchKernel; if (CHECKS) { check(__functionAddress); check(f); } return callPPPPI(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, __functionAddress); } /** * Launches a CUDA function. * *

Invokes the kernel {@code f} on a {@code gridDimX} x {@code gridDimY} x {@code gridDimZ} grid of blocks. Each block contains {@code blockDimX} x {@code * blockDimY} x {@code blockDimZ} threads.

* *

{@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.

* *

Kernel parameters to {@code f} can be specified in one of two ways:

* *
    *
  1. Kernel parameters can be specified via {@code kernelParams}. * *

    If {@code f} has N parameters, then {@code kernelParams} needs to be an array of N pointers. Each of {@code kernelParams[0]} through * {@code kernelParams[N-1]} must point to a region of memory from which the actual kernel parameter will be copied. The number of kernel parameters * and their offsets and sizes do not need to be specified as that information is retrieved directly from the kernel's image.

  2. *
  3. Kernel parameters can also be packaged by the application into a single buffer that is passed in via the {@code extra} parameter. * *

    This places the burden on the application of knowing each kernel parameter's size and alignment/padding within the buffer. Here is an example of * using the {@code extra} parameter in this manner:

    * *
    
         * size_t argBufferSize;
         * char argBuffer[256];
         * 
         * // populate argBuffer and argBufferSize
         * 
         * void *config[] = {
         *     CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
         *     CU_LAUNCH_PARAM_BUFFER_SIZE,    &argBufferSize,
         *     CU_LAUNCH_PARAM_END
         * };
         * status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL, config);
  4. *
* *

The {@code extra} parameter exists to allow {@code cuLaunchKernel()} to take additional less commonly used arguments. {@code extra} specifies a list of * names of extra settings and their corresponding values. Each extra setting name is immediately followed by the corresponding value. The list must be * terminated with either {@code NULL} or {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}.

* *
    *
  • {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}, which indicates the end of the {@code extra} array
  • *
  • {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}, which specifies that the next value in {@code extra} will be a pointer to a buffer containing all the kernel * parameters for launching kernel {@code f}
  • *
  • {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE}, which specifies that the next value in {@code extra} will be a pointer to a size_t containing the size of the buffer * specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}
  • *
* *

The error {@link #CUDA_ERROR_INVALID_VALUE} will be returned if kernel parameters are specified with both {@code kernelParams} and {@code extra} (i.e. both * {@code kernelParams} and {@code extra} are non-{@code NULL}).

* *

Calling {@code cuLaunchKernel()} invalidates the persistent function state set through the following deprecated APIs: {@link #cuFuncSetBlockShape FuncSetBlockShape}, * {@link #cuFuncSetSharedSize FuncSetSharedSize}, {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, {@link #cuParamSetv ParamSetv}.

* *

Note that to use {@link #cuLaunchKernel LaunchKernel}, the kernel {@code f} must either have been compiled with toolchain version 3.2 or later so that it will contain * kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then {@link #cuLaunchKernel LaunchKernel} will return * {@link #CUDA_ERROR_INVALID_IMAGE}.

* * @param f kernel to launch * @param gridDimX width of grid in blocks * @param gridDimY height of grid in blocks * @param gridDimZ depth of grid in blocks * @param blockDimX x dimension of each thread block * @param blockDimY y dimension of each thread block * @param blockDimZ z dimension of each thread block * @param sharedMemBytes dynamic shared-memory size per thread block in bytes * @param hStream stream identifier * @param kernelParams array of pointers to kernel parameters * @param extra extra options */ @NativeType("CUresult") public static int cuLaunchKernel(@NativeType("CUfunction") long f, @NativeType("unsigned int") int gridDimX, @NativeType("unsigned int") int gridDimY, @NativeType("unsigned int") int gridDimZ, @NativeType("unsigned int") int blockDimX, @NativeType("unsigned int") int blockDimY, @NativeType("unsigned int") int blockDimZ, @NativeType("unsigned int") int sharedMemBytes, @NativeType("CUstream") long hStream, @Nullable @NativeType("void **") PointerBuffer kernelParams, @Nullable @NativeType("void **") PointerBuffer extra) { return ncuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, memAddressSafe(kernelParams), memAddressSafe(extra)); } // --- [ cuLaunchCooperativeKernel ] --- /** Unsafe version of: {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} */ public static int ncuLaunchCooperativeKernel(long f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, long hStream, long kernelParams) { long __functionAddress = Functions.LaunchCooperativeKernel; if (CHECKS) { check(__functionAddress); check(f); } return callPPPI(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, __functionAddress); } /** * Launches a CUDA function where thread blocks can cooperate and synchronize as they execute. * *

Invokes the kernel {@code f} on a {@code gridDimX} x {@code gridDimY} x {@code gridDimZ} grid of blocks. Each block contains {@code blockDimX} x {@code * blockDimY} x {@code blockDimZ} threads.

* *

{@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.

* *

The device on which this kernel is invoked must have a non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH}.

* *

The total number of blocks launched cannot exceed the maximum number of blocks per multiprocessor as returned by * {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} (or {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags}) times the number of multiprocessors as * specified by the device attribute {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}.

* *

The kernel cannot make use of CUDA dynamic parallelism.

* *

Kernel parameters must be specified via {@code kernelParams}. If {@code f} has N parameters, then {@code kernelParams} needs to be an array of N * pointers. Each of {@code kernelParams[0]} through {@code kernelParams[N-1]} must point to a region of memory from which the actual kernel parameter * will be copied. The number of kernel parameters and their offsets and sizes do not need to be specified as that information is retrieved directly from * the kernel's image.

* *

Calling {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} sets persistent function state that is the same as function state set through {@link #cuLaunchKernel LaunchKernel} API

* *

When the kernel {@code f} is launched via {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}, the previous block shape, shared size and parameter info associated with * {@code f} is overwritten.

* *

Note that to use {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel}, the kernel {@code f} must either have been compiled with toolchain version 3.2 or later so that it will * contain kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then {@link #cuLaunchCooperativeKernel LaunchCooperativeKernel} will * return {@link #CUDA_ERROR_INVALID_IMAGE}.

* * @param f kernel to launch * @param gridDimX width of grid in blocks * @param gridDimY height of grid in blocks * @param gridDimZ depth of grid in blocks * @param blockDimX x dimension of each thread block * @param blockDimY y dimension of each thread block * @param blockDimZ z dimension of each thread block * @param sharedMemBytes dynamic shared-memory size per thread block in bytes * @param hStream stream identifier * @param kernelParams array of pointers to kernel parameters */ @NativeType("CUresult") public static int cuLaunchCooperativeKernel(@NativeType("CUfunction") long f, @NativeType("unsigned int") int gridDimX, @NativeType("unsigned int") int gridDimY, @NativeType("unsigned int") int gridDimZ, @NativeType("unsigned int") int blockDimX, @NativeType("unsigned int") int blockDimY, @NativeType("unsigned int") int blockDimZ, @NativeType("unsigned int") int sharedMemBytes, @NativeType("CUstream") long hStream, @Nullable @NativeType("void **") PointerBuffer kernelParams) { return ncuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, memAddressSafe(kernelParams)); } // --- [ cuLaunchCooperativeKernelMultiDevice ] --- /** * Unsafe version of: {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} * * @param numDevices size of the {@code launchParamsList} array */ public static int ncuLaunchCooperativeKernelMultiDevice(long launchParamsList, int numDevices, int flags) { long __functionAddress = Functions.LaunchCooperativeKernelMultiDevice; if (CHECKS) { check(__functionAddress); } return callPI(launchParamsList, numDevices, flags, __functionAddress); } /** * Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they executeDeprecated: This function is deprecated as * of CUDA 11.3. * *

Invokes kernels as specified in the {@code launchParamsList} array where each element of the array specifies all the parameters required to perform a * single kernel launch. These kernels can cooperate and synchronize as they execute. The size of the array is specified by {@code numDevices}.

* *

No two kernels can be launched on the same device. All the devices targeted by this multi-device launch must be identical. All devices must have a * non-zero value for the device attribute {@link #CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH}.

* *

All kernels launched must be identical with respect to the compiled code. Note that any __device__ __constant__ or __managed__ variables present * in the module that owns the kernel launched on each device, are independently instantiated on every device. It is the application's responsibility to * ensure these variables are initialized and used appropriately.

* *

The size of the grids as specified in blocks, the size of the blocks themselves and the amount of shared memory used by each thread block must also * match across all launched kernels.

* *

The streams used to launch these kernels must have been created via either {@link #cuStreamCreate StreamCreate} or {@link #cuStreamCreateWithPriority StreamCreateWithPriority}. The {@code NULL} stream or * {@link #CU_STREAM_LEGACY STREAM_LEGACY} or {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD} cannot be used.

* *

The total number of blocks launched per kernel cannot exceed the maximum number of blocks per multiprocessor as returned by * {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} (or {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags}) times the number of multiprocessors as * specified by the device attribute {@link #CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}. Since the total number of blocks launched per device has to match across * all devices, the maximum number of blocks that can be launched per device will be limited by the device with the least number of multiprocessors.

* *

The kernels cannot make use of CUDA dynamic parallelism.

* *
    *
  • {@code CUDA_LAUNCH_PARAMS::function} specifies the kernel to be launched. All functions must be identical with respect to the compiled code.
  • *
  • {@code CUDA_LAUNCH_PARAMS::gridDimX} is the width of the grid in blocks. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::gridDimY} is the height of the grid in blocks. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::gridDimZ} is the depth of the grid in blocks. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::blockDimX} is the X dimension of each thread block. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::blockDimX} is the Y dimension of each thread block. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::blockDimZ} is the Z dimension of each thread block. This must match across all kernels launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::sharedMemBytes} is the dynamic shared-memory size per thread block in bytes. This must match across all kernels * launched.
  • *
  • {@code CUDA_LAUNCH_PARAMS::hStream} is the handle to the stream to perform the launch in. This cannot be the {@code NULL} stream or {@link #CU_STREAM_LEGACY STREAM_LEGACY} or * {@link #CU_STREAM_PER_THREAD STREAM_PER_THREAD}. The CUDA context associated with this stream must match that associated with {@code CUDA_LAUNCH_PARAMS::function}.
  • *
  • {@code CUDA_LAUNCH_PARAMS::kernelParams} is an array of pointers to kernel parameters. If {@code ::function} has N parameters, then * {@code ::kernelParams} needs to be an array of N pointers. Each of {@code ::kernelParams[0]} through {@code ::kernelParams[N-1]} must point to a * region of memory from which the actual kernel parameter will be copied. The number of kernel parameters and their offsets and sizes do not need to * be specified as that information is retrieved directly from the kernel's image.
  • *
* *

By default, the kernel won't begin execution on any GPU until all prior work in all the specified streams has completed. This behavior can be * overridden by specifying the flag {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC}. When this flag is specified, each kernel will only wait * for prior work in the stream corresponding to that GPU to complete before it begins execution.

* *

Similarly, by default, any subsequent work pushed in any of the specified streams will not begin execution until the kernels on all GPUs have * completed. This behavior can be overridden by specifying the flag {@link #CU_CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC}. When this flag is * specified, any subsequent work pushed in any of the specified streams will only wait for the kernel launched on the GPU corresponding to that stream to * complete before it begins execution.

* *

Calling {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} sets persistent function state that is the same as function state set through {@link #cuLaunchKernel LaunchKernel} API * when called individually for each element in {@code launchParamsList}.

* *

When kernels are launched via {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice}, the previous block shape, shared size and parameter info associated with each * {@code CUDA_LAUNCH_PARAMS::function} in {@code launchParamsList} is overwritten.

* *

Note that to use {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice}, the kernels must either have been compiled with toolchain version 3.2 or later so that it * will contain kernel parameter information, or have no kernel parameters. If either of these conditions is not met, then * {@link #cuLaunchCooperativeKernelMultiDevice LaunchCooperativeKernelMultiDevice} will return {@link #CUDA_ERROR_INVALID_IMAGE}.

* * @param launchParamsList list of launch parameters, one per device * @param flags flags to control launch behavior */ @NativeType("CUresult") public static int cuLaunchCooperativeKernelMultiDevice(@NativeType("CUDA_LAUNCH_PARAMS *") CUDA_LAUNCH_PARAMS.Buffer launchParamsList, @NativeType("unsigned int") int flags) { return ncuLaunchCooperativeKernelMultiDevice(launchParamsList.address(), launchParamsList.remaining(), flags); } // --- [ cuLaunchHostFunc ] --- /** Unsafe version of: {@link #cuLaunchHostFunc LaunchHostFunc} */ public static int ncuLaunchHostFunc(long hStream, long fn, long userData) { long __functionAddress = Functions.LaunchHostFunc; if (CHECKS) { check(__functionAddress); check(userData); } return callPPPI(hStream, fn, userData, __functionAddress); } /** * Enqueues a host function call in a stream. * *

Enqueues a host function to run in a stream. The function will be called after currently enqueued work and will block work added after it.

* *

The host function must not make any CUDA API calls. Attempting to use a CUDA API may result in {@link #CUDA_ERROR_NOT_PERMITTED}, but this is not required. * The host function must not perform any synchronization that may depend on outstanding CUDA work not mandated to run earlier. Host functions without a * mandated order (such as in independent streams) execute in undefined order and may be serialized.

* *

For the purposes of Unified Memory, execution makes a number of guarantees:

* *
    *
  • The stream is considered idle for the duration of the function's execution. Thus, for example, the function may always use memory attached to the * stream it was enqueued in.
  • *
  • The start of execution of the function has the same effect as synchronizing an event recorded in the same stream immediately prior to the function. * It thus synchronizes streams which have been "joined" prior to the function.
  • *
  • Adding device work to any stream does not have the effect of making the stream active until all preceding host functions and stream callbacks have * executed. Thus, for example, a function might use global attached memory even if work has been added to another stream, if the work has been * ordered behind the function call with an event.
  • *
  • Completion of the function does not cause a stream to become active except as described above. The stream will remain idle if no device work * follows the function, and will remain idle across consecutive host functions or stream callbacks without device work in between. Thus, for example, * stream synchronization can be done by signaling from a host function at the end of the stream.
  • *
* *

Note that, in contrast to {@link #cuStreamAddCallback StreamAddCallback}, the function will not be called in the event of an error in the CUDA context.

* * @param hStream stream to enqueue function call in * @param fn the function to call once preceding stream operations are complete * @param userData user-specified data to be passed to the function */ @NativeType("CUresult") public static int cuLaunchHostFunc(@NativeType("CUstream") long hStream, @NativeType("void (*) (void *)") CUhostFnI fn, @NativeType("void *") long userData) { return ncuLaunchHostFunc(hStream, fn.address(), userData); } // --- [ cuFuncSetBlockShape ] --- /** * Sets the block-dimensions for the function. (Deprecated) * *

Specifies the {@code x}, {@code y}, and {@code z} dimensions of the thread blocks that are created when the kernel given by {@code hfunc} is launched.

* * @param hfunc kernel to specify dimensions of * @param x x dimension * @param y y dimension * @param z z dimension */ @NativeType("CUresult") public static int cuFuncSetBlockShape(@NativeType("CUfunction") long hfunc, int x, int y, int z) { long __functionAddress = Functions.FuncSetBlockShape; if (CHECKS) { check(hfunc); } return callPI(hfunc, x, y, z, __functionAddress); } // --- [ cuFuncSetSharedSize ] --- /** * Sets the dynamic shared-memory size for the function. (Deprecated) * *

Sets through {@code bytes} the amount of dynamic shared memory that will be available to each thread block when the kernel given by {@code hfunc} is * launched.

* * @param hfunc kernel to specify dynamic shared-memory size for * @param bytes dynamic shared-memory size per thread in bytes */ @NativeType("CUresult") public static int cuFuncSetSharedSize(@NativeType("CUfunction") long hfunc, @NativeType("unsigned int") int bytes) { long __functionAddress = Functions.FuncSetSharedSize; if (CHECKS) { check(hfunc); } return callPI(hfunc, bytes, __functionAddress); } // --- [ cuParamSetSize ] --- /** * Sets the parameter size for the function. (Deprecated) * *

Sets through {@code numbytes} the total size in bytes needed by the function parameters of the kernel corresponding to {@code hfunc}.

* * @param hfunc kernel to set parameter size for * @param numbytes size of parameter list in bytes */ @NativeType("CUresult") public static int cuParamSetSize(@NativeType("CUfunction") long hfunc, @NativeType("unsigned int") int numbytes) { long __functionAddress = Functions.ParamSetSize; if (CHECKS) { check(hfunc); } return callPI(hfunc, numbytes, __functionAddress); } // --- [ cuParamSeti ] --- /** * Adds an integer parameter to the function's argument listDeprecated: * *

Sets an integer parameter that will be specified the next time the kernel corresponding to {@code hfunc} will be invoked. {@code offset} is a byte * offset.

* * @param hfunc kernel to add parameter to * @param offset offset to add parameter to argument list * @param value value of parameter */ @NativeType("CUresult") public static int cuParamSeti(@NativeType("CUfunction") long hfunc, int offset, @NativeType("unsigned int") int value) { long __functionAddress = Functions.ParamSeti; if (CHECKS) { check(hfunc); } return callPI(hfunc, offset, value, __functionAddress); } // --- [ cuParamSetf ] --- /** * Adds a floating-point parameter to the function's argument list. (Deprecated) * *

Sets a floating-point parameter that will be specified the next time the kernel corresponding to {@code hfunc} will be invoked. {@code offset} is a * byte offset.

* * @param hfunc kernel to add parameter to * @param offset offset to add parameter to argument list * @param value value of parameter */ @NativeType("CUresult") public static int cuParamSetf(@NativeType("CUfunction") long hfunc, int offset, float value) { long __functionAddress = Functions.ParamSetf; if (CHECKS) { check(hfunc); } return callPI(hfunc, offset, value, __functionAddress); } // --- [ cuParamSetv ] --- /** * Unsafe version of: {@link #cuParamSetv ParamSetv} * * @param numbytes size of data to copy in bytes */ public static int ncuParamSetv(long hfunc, int offset, long ptr, int numbytes) { long __functionAddress = Functions.ParamSetv; if (CHECKS) { check(hfunc); } return callPPI(hfunc, offset, ptr, numbytes, __functionAddress); } /** * Adds arbitrary data to the function's argument list. (Deprecated) * *

Copies an arbitrary amount of data (specified in {@code numbytes}) from {@code ptr} into the parameter space of the kernel corresponding to * {@code hfunc}. {@code offset} is a byte offset.

* * @param hfunc kernel to add data to * @param offset offset to add data to argument list * @param ptr pointer to arbitrary data */ @NativeType("CUresult") public static int cuParamSetv(@NativeType("CUfunction") long hfunc, int offset, @NativeType("void *") ByteBuffer ptr) { return ncuParamSetv(hfunc, offset, memAddress(ptr), ptr.remaining()); } // --- [ cuLaunch ] --- /** * Launches a CUDA function. (Deprecated) * *

Invokes the kernel {@code f} on a 1 x 1 x 1 grid of blocks. The block contains the number of threads specified by a previous call to * {@link #cuFuncSetBlockShape FuncSetBlockShape}.

* *

The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize}, * {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.

* *

Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After * launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.

* * @param f kernel to launch */ @NativeType("CUresult") public static int cuLaunch(@NativeType("CUfunction") long f) { long __functionAddress = Functions.Launch; if (CHECKS) { check(f); } return callPI(f, __functionAddress); } // --- [ cuLaunchGrid ] --- /** * Launches a CUDA function. (Deprecated) * *

Invokes the kernel {@code f} on a {@code grid_width} x {@code grid_height} grid of blocks. Each block contains the number of threads specified by a * previous call to {@link #cuFuncSetBlockShape FuncSetBlockShape}.

* *

The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize}, * {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.

* *

Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After * launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.

* * @param f kernel to launch * @param grid_width width of grid in blocks * @param grid_height height of grid in blocks */ @NativeType("CUresult") public static int cuLaunchGrid(@NativeType("CUfunction") long f, int grid_width, int grid_height) { long __functionAddress = Functions.LaunchGrid; if (CHECKS) { check(f); } return callPI(f, grid_width, grid_height, __functionAddress); } // --- [ cuLaunchGridAsync ] --- /** * Launches a CUDA function. (Deprecated) * *

Invokes the kernel {@code f} on a {@code grid_width} x {@code grid_height} grid of blocks. Each block contains the number of threads specified by a * previous call to {@link #cuFuncSetBlockShape FuncSetBlockShape}.

* *

The block shape, dynamic shared memory size, and parameter information must be set using {@link #cuFuncSetBlockShape FuncSetBlockShape}, {@link #cuFuncSetSharedSize FuncSetSharedSize}, * {@link #cuParamSetSize ParamSetSize}, {@link #cuParamSeti ParamSeti}, {@link #cuParamSetf ParamSetf}, and {@link #cuParamSetv ParamSetv} prior to calling this function.

* *

Launching a function via {@link #cuLaunchKernel LaunchKernel} invalidates the function's block shape, dynamic shared memory size, and parameter information. After * launching via cuLaunchKernel, this state must be re-initialized prior to calling this function. Failure to do so results in undefined behavior.

* *
Note
* *

In certain cases where cubins are created with no ABI (i.e., using {@code ptxas} {@code --abi-compile} {@code no}), this function may * serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it * afterwards.

* * @param f kernel to launch * @param grid_width width of grid in blocks * @param grid_height height of grid in blocks * @param hStream stream identifier */ @NativeType("CUresult") public static int cuLaunchGridAsync(@NativeType("CUfunction") long f, int grid_width, int grid_height, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.LaunchGridAsync; if (CHECKS) { check(f); } return callPPI(f, grid_width, grid_height, hStream, __functionAddress); } // --- [ cuParamSetTexRef ] --- /** * Adds a texture-reference to the function's argument list. (Deprecated) * *

Makes the CUDA array or linear memory bound to the texture reference {@code hTexRef} available to a device program as a texture. In this version of * CUDA, the texture-reference must be obtained via {@link #cuModuleGetTexRef ModuleGetTexRef} and the {@code texunit} parameter must be set to {@link #CU_PARAM_TR_DEFAULT PARAM_TR_DEFAULT}.

* * @param hfunc kernel to add texture-reference to * @param texunit texture unit (must be {@link #CU_PARAM_TR_DEFAULT PARAM_TR_DEFAULT}) * @param hTexRef texture-reference to add to argument list */ @NativeType("CUresult") public static int cuParamSetTexRef(@NativeType("CUfunction") long hfunc, int texunit, @NativeType("CUtexref") long hTexRef) { long __functionAddress = Functions.ParamSetTexRef; if (CHECKS) { check(hfunc); check(hTexRef); } return callPPI(hfunc, texunit, hTexRef, __functionAddress); } // --- [ cuGraphCreate ] --- /** Unsafe version of: {@link #cuGraphCreate GraphCreate} */ public static int ncuGraphCreate(long phGraph, int flags) { long __functionAddress = Functions.GraphCreate; if (CHECKS) { check(__functionAddress); } return callPI(phGraph, flags, __functionAddress); } /** * Creates a graph. * *

Creates an empty graph, which is returned via {@code phGraph}.

* * @param phGraph returns newly created graph * @param flags graph creation flags, must be 0 */ @NativeType("CUresult") public static int cuGraphCreate(@NativeType("CUgraph *") PointerBuffer phGraph, @NativeType("unsigned int") int flags) { if (CHECKS) { check(phGraph, 1); } return ncuGraphCreate(memAddress(phGraph), flags); } // --- [ cuGraphAddKernelNode ] --- /** * Unsafe version of: {@link #cuGraphAddKernelNode GraphAddKernelNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddKernelNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) { long __functionAddress = Functions.GraphAddKernelNode; if (CHECKS) { check(__functionAddress); check(hGraph); CUDA_KERNEL_NODE_PARAMS.validate(nodeParams); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress); } /** * Creates a kernel execution node and adds it to a graph. * *

Creates a new kernel execution node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and * arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the * graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

When the graph is launched, the node will invoke kernel {@code func} on a ({@code gridDimX} x {@code gridDimY} x {@code gridDimZ}) grid of blocks. Each * block contains ({@code blockDimX} x {@code blockDimY} x {@code blockDimZ}) threads.

* *

{@code sharedMemBytes} sets the amount of dynamic shared memory that will be available to each thread block.

* *

Kernel parameters to {@code func} can be specified in one of two ways:

* *
    *
  1. Kernel parameters can be specified via {@code kernelParams}. If the kernel has N parameters, then {@code kernelParams} needs to be an array of N * pointers. Each pointer, from {@code kernelParams[0]} to {@code kernelParams[N-1]}, points to the region of memory from which the actual parameter * will be copied. The number of kernel parameters and their offsets and sizes do not need to be specified as that information is retrieved directly * from the kernel's image.
  2. *
  3. Kernel parameters for non-cooperative kernels can also be packaged by the application into a single buffer that is passed in via {@code extra}. * This places the burden on the application of knowing each kernel parameter's size and alignment/padding within the buffer. The {@code extra} * parameter exists to allow this function to take additional less commonly used arguments. {@code extra} specifies a list of names of extra settings * and their corresponding values. Each extra setting name is immediately followed by the corresponding value. The list must be terminated with either * {@code NULL} or {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}. * *
      *
    • {@link #CU_LAUNCH_PARAM_END LAUNCH_PARAM_END}, which indicates the end of the {@code extra} array;
    • *
    • {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER}, which specifies that the next value in {@code extra} will be a pointer to a buffer containing all the kernel * parameters for launching kernel {@code func;}
    • *
    • {@link #CU_LAUNCH_PARAM_BUFFER_SIZE LAUNCH_PARAM_BUFFER_SIZE}, which specifies that the next value in {@code extra} will be a pointer to a size_t containing the size of the buffer * specified with {@link #CU_LAUNCH_PARAM_BUFFER_POINTER LAUNCH_PARAM_BUFFER_POINTER};
    • *
  4. *
* *

The error {@link #CUDA_ERROR_INVALID_VALUE} will be returned if kernel parameters are specified with both {@code kernelParams} and {@code extra} (i.e. both * {@code kernelParams} and {@code extra} are non-NULL). {@link #CUDA_ERROR_INVALID_VALUE} will be returned if {@code extra} is used for a cooperative kernel.

* *

The {@code kernelParams} or {@code extra} array, as well as the argument values it points to, are copied during this call.

* *
Note
* *

Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is * undefined behavior. This restriction does not apply to texture and surface objects.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param nodeParams parameters for the GPU execution node */ @NativeType("CUresult") public static int cuGraphAddKernelNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddKernelNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address()); } // --- [ cuGraphKernelNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphKernelNodeGetParams GraphKernelNodeGetParams} */ public static int ncuGraphKernelNodeGetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphKernelNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Returns a kernel node's parameters. * *

Returns the parameters of kernel node {@code hNode} in {@code nodeParams}. The {@code kernelParams} or {@code extra} array returned in * {@code nodeParams}, as well as the argument values it points to, are owned by the node. This memory remains valid until the node is destroyed or its * parameters are modified, and should not be modified directly. Use {@link #cuGraphKernelNodeSetParams GraphKernelNodeSetParams} to update the parameters of this node.

* *

The params will contain either {@code kernelParams} or {@code extra}, according to which of these was most recently set on the node.

* * @param hNode node to get the parameters for * @param nodeParams pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphKernelNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS *") CUDA_KERNEL_NODE_PARAMS nodeParams) { return ncuGraphKernelNodeGetParams(hNode, nodeParams.address()); } // --- [ cuGraphKernelNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphKernelNodeSetParams GraphKernelNodeSetParams} */ public static int ncuGraphKernelNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphKernelNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); CUDA_KERNEL_NODE_PARAMS.validate(nodeParams); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets a kernel node's parameters. * *

Sets the parameters of kernel node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphKernelNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) { return ncuGraphKernelNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddMemcpyNode ] --- /** * Unsafe version of: {@link #cuGraphAddMemcpyNode GraphAddMemcpyNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddMemcpyNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long copyParams, long ctx) { long __functionAddress = Functions.GraphAddMemcpyNode; if (CHECKS) { check(__functionAddress); check(hGraph); check(ctx); } return callPPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx, __functionAddress); } /** * Creates a memcpy node and adds it to a graph. * *

Creates a new memcpy node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}. It is possible * for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} may not have any duplicate * entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

When the graph is launched, the node will perform the memcpy described by {@code copyParams}. See {@link #cuMemcpy3D Memcpy3D} for a description of the structure and * its restrictions.

* *

Memcpy nodes have some additional restrictions with regards to managed memory, if the system contains at least one device which has a zero value for * the device attribute {@link #CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS}. If one or more of the operands refer to managed memory, then using the memory * type {@link #CU_MEMORYTYPE_UNIFIED MEMORYTYPE_UNIFIED} is disallowed for those operand(s). The managed memory will be treated as residing on either the host or the device, * depending on which memory type is specified.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param copyParams parameters for the memory copy * @param ctx context on which to run the node */ @NativeType("CUresult") public static int cuGraphAddMemcpyNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D copyParams, @NativeType("CUcontext") long ctx) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddMemcpyNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), copyParams.address(), ctx); } // --- [ cuGraphMemcpyNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphMemcpyNodeGetParams GraphMemcpyNodeGetParams} */ public static int ncuGraphMemcpyNodeGetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphMemcpyNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Returns a memcpy node's parameters. * *

Returns the parameters of memcpy node {@code hNode} in {@code nodeParams}.

* * @param hNode node to get the parameters for * @param nodeParams pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphMemcpyNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D *") CUDA_MEMCPY3D nodeParams) { return ncuGraphMemcpyNodeGetParams(hNode, nodeParams.address()); } // --- [ cuGraphMemcpyNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphMemcpyNodeSetParams GraphMemcpyNodeSetParams} */ public static int ncuGraphMemcpyNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphMemcpyNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets a memcpy node's parameters. * *

Sets the parameters of memcpy node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphMemcpyNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D nodeParams) { return ncuGraphMemcpyNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddMemsetNode ] --- /** * Unsafe version of: {@link #cuGraphAddMemsetNode GraphAddMemsetNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddMemsetNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long memsetParams, long ctx) { long __functionAddress = Functions.GraphAddMemsetNode; if (CHECKS) { check(__functionAddress); check(hGraph); CUDA_MEMSET_NODE_PARAMS.validate(memsetParams); check(ctx); } return callPPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx, __functionAddress); } /** * Creates a memset node and adds it to a graph. * *

Creates a new memset node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies}. It is possible * for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} may not have any duplicate * entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

The element size must be 1, 2, or 4 bytes. When the graph is launched, the node will perform the memset described by {@code memsetParams}.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param memsetParams parameters for the memory set * @param ctx context on which to run the node */ @NativeType("CUresult") public static int cuGraphAddMemsetNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS memsetParams, @NativeType("CUcontext") long ctx) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddMemsetNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), memsetParams.address(), ctx); } // --- [ cuGraphMemsetNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphMemsetNodeGetParams GraphMemsetNodeGetParams} */ public static int ncuGraphMemsetNodeGetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphMemsetNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Returns a memset node's parameters. * *

Returns the parameters of memset node {@code hNode} in {@code nodeParams}.

* * @param hNode node to get the parameters for * @param nodeParams pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphMemsetNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS *") CUDA_MEMSET_NODE_PARAMS nodeParams) { return ncuGraphMemsetNodeGetParams(hNode, nodeParams.address()); } // --- [ cuGraphMemsetNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphMemsetNodeSetParams GraphMemsetNodeSetParams} */ public static int ncuGraphMemsetNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphMemsetNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); CUDA_MEMSET_NODE_PARAMS.validate(nodeParams); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets a memset node's parameters. * *

Sets the parameters of memset node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphMemsetNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS nodeParams) { return ncuGraphMemsetNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddHostNode ] --- /** * Unsafe version of: {@link #cuGraphAddHostNode GraphAddHostNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddHostNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) { long __functionAddress = Functions.GraphAddHostNode; if (CHECKS) { check(__functionAddress); check(hGraph); CUDA_HOST_NODE_PARAMS.validate(nodeParams); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress); } /** * Creates a host execution node and adds it to a graph. * *

Creates a new CPU execution node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and * arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the * graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

When the graph is launched, the node will invoke the specified CPU function. Host nodes are not supported under MPS with pre-Volta GPUs.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param nodeParams parameters for the host node */ @NativeType("CUresult") public static int cuGraphAddHostNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddHostNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address()); } // --- [ cuGraphHostNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphHostNodeGetParams GraphHostNodeGetParams} */ public static int ncuGraphHostNodeGetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphHostNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Returns a host node's parameters. * *

Returns the parameters of host node {@code hNode} in {@code nodeParams}.

* * @param hNode node to get the parameters for * @param nodeParams pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphHostNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS *") CUDA_HOST_NODE_PARAMS nodeParams) { return ncuGraphHostNodeGetParams(hNode, nodeParams.address()); } // --- [ cuGraphHostNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphHostNodeSetParams GraphHostNodeSetParams} */ public static int ncuGraphHostNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphHostNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); CUDA_HOST_NODE_PARAMS.validate(nodeParams); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets a host node's parameters. * *

Sets the parameters of host node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphHostNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) { return ncuGraphHostNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddChildGraphNode ] --- /** * Unsafe version of: {@link #cuGraphAddChildGraphNode GraphAddChildGraphNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddChildGraphNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long childGraph) { long __functionAddress = Functions.GraphAddChildGraphNode; if (CHECKS) { check(__functionAddress); check(hGraph); check(childGraph); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, childGraph, __functionAddress); } /** * Creates a child graph node and adds it to a graph. * *

Creates a new node which executes an embedded graph, and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code * dependencies}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} * may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

If {@code hGraph} contains allocation or free nodes, this call will return an error.

* *

The node executes an embedded child graph. The child graph is cloned in this call.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param childGraph the graph to clone into this node */ @NativeType("CUresult") public static int cuGraphAddChildGraphNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUgraph") long childGraph) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddChildGraphNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), childGraph); } // --- [ cuGraphChildGraphNodeGetGraph ] --- /** Unsafe version of: {@link #cuGraphChildGraphNodeGetGraph GraphChildGraphNodeGetGraph} */ public static int ncuGraphChildGraphNodeGetGraph(long hNode, long phGraph) { long __functionAddress = Functions.GraphChildGraphNodeGetGraph; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, phGraph, __functionAddress); } /** * Gets a handle to the embedded graph of a child graph node. * *

Gets a handle to the embedded graph in a child graph node. This call does not clone the graph. Changes to the graph will be reflected in the node, and * the node retains ownership of the graph.

* *

Allocation and free nodes cannot be added to the returned graph. Attempting to do so will return an error.

* * @param hNode node to get the embedded graph for * @param phGraph location to store a handle to the graph */ @NativeType("CUresult") public static int cuGraphChildGraphNodeGetGraph(@NativeType("CUgraphNode") long hNode, @NativeType("CUgraph *") PointerBuffer phGraph) { if (CHECKS) { check(phGraph, 1); } return ncuGraphChildGraphNodeGetGraph(hNode, memAddress(phGraph)); } // --- [ cuGraphAddEmptyNode ] --- /** * Unsafe version of: {@link #cuGraphAddEmptyNode GraphAddEmptyNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddEmptyNode(long phGraphNode, long hGraph, long dependencies, long numDependencies) { long __functionAddress = Functions.GraphAddEmptyNode; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPI(phGraphNode, hGraph, dependencies, numDependencies, __functionAddress); } /** * Creates an empty node and adds it to a graph. * *

Creates a new node which performs no operation, and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code * dependencies}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code dependencies} * may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of n * nodes with a barrier between them can be represented using an empty node and 2*n dependency edges, rather than no empty node and n^2 dependency edges.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node */ @NativeType("CUresult") public static int cuGraphAddEmptyNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddEmptyNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies)); } // --- [ cuGraphAddEventRecordNode ] --- /** * Unsafe version of: {@link #cuGraphAddEventRecordNode GraphAddEventRecordNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddEventRecordNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long event) { long __functionAddress = Functions.GraphAddEventRecordNode; if (CHECKS) { check(__functionAddress); check(hGraph); check(event); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, event, __functionAddress); } /** * Creates an event record node and adds it to a graph. * *

Creates a new event record node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and event * specified in {@code event}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code * dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

Each launch of the graph will record {@code event} to capture execution of the node's dependencies.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param event event for the node */ @NativeType("CUresult") public static int cuGraphAddEventRecordNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUevent") long event) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddEventRecordNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), event); } // --- [ cuGraphEventRecordNodeGetEvent ] --- /** Unsafe version of: {@link #cuGraphEventRecordNodeGetEvent GraphEventRecordNodeGetEvent} */ public static int ncuGraphEventRecordNodeGetEvent(long hNode, long event_out) { long __functionAddress = Functions.GraphEventRecordNodeGetEvent; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, event_out, __functionAddress); } /** * Returns the event associated with an event record node. * *

Returns the event of event record node {@code hNode} in {@code event_out}.

* * @param hNode node to get the event for * @param event_out pointer to return the event */ @NativeType("CUresult") public static int cuGraphEventRecordNodeGetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent *") PointerBuffer event_out) { if (CHECKS) { check(event_out, 1); } return ncuGraphEventRecordNodeGetEvent(hNode, memAddress(event_out)); } // --- [ cuGraphEventRecordNodeSetEvent ] --- /** * Sets an event record node's event. * *

Sets the event of event record node {@code hNode} to {@code event}.

* * @param hNode node to set the event for * @param event event to use */ @NativeType("CUresult") public static int cuGraphEventRecordNodeSetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) { long __functionAddress = Functions.GraphEventRecordNodeSetEvent; if (CHECKS) { check(__functionAddress); check(hNode); check(event); } return callPPI(hNode, event, __functionAddress); } // --- [ cuGraphAddEventWaitNode ] --- /** * Unsafe version of: {@link #cuGraphAddEventWaitNode GraphAddEventWaitNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddEventWaitNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long event) { long __functionAddress = Functions.GraphAddEventWaitNode; if (CHECKS) { check(__functionAddress); check(hGraph); check(event); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, event, __functionAddress); } /** * Creates an event wait node and adds it to a graph. * *

Creates a new event wait node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and event * specified in {@code event}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. {@code * dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

The graph node will wait for all work captured in {@code event}. See {@link #cuEventRecord EventRecord} for details on what is captured by an event. {@code event} may * be from a different context or device than the launch stream.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param event event for the node */ @NativeType("CUresult") public static int cuGraphAddEventWaitNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUevent") long event) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddEventWaitNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), event); } // --- [ cuGraphEventWaitNodeGetEvent ] --- /** Unsafe version of: {@link #cuGraphEventWaitNodeGetEvent GraphEventWaitNodeGetEvent} */ public static int ncuGraphEventWaitNodeGetEvent(long hNode, long event_out) { long __functionAddress = Functions.GraphEventWaitNodeGetEvent; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, event_out, __functionAddress); } /** * Returns the event associated with an event wait node. * *

Returns the event of event wait node {@code hNode} in {@code event_out}.

* * @param hNode node to get the event for * @param event_out pointer to return the event */ @NativeType("CUresult") public static int cuGraphEventWaitNodeGetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent *") PointerBuffer event_out) { if (CHECKS) { check(event_out, 1); } return ncuGraphEventWaitNodeGetEvent(hNode, memAddress(event_out)); } // --- [ cuGraphEventWaitNodeSetEvent ] --- /** * Sets an event wait node's event. * *

Sets the event of event wait node {@code hNode} to {@code event}.

* * @param hNode node to set the event for * @param event event to use */ @NativeType("CUresult") public static int cuGraphEventWaitNodeSetEvent(@NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) { long __functionAddress = Functions.GraphEventWaitNodeSetEvent; if (CHECKS) { check(__functionAddress); check(hNode); check(event); } return callPPI(hNode, event, __functionAddress); } // --- [ cuGraphAddExternalSemaphoresSignalNode ] --- /** * Unsafe version of: {@link #cuGraphAddExternalSemaphoresSignalNode GraphAddExternalSemaphoresSignalNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddExternalSemaphoresSignalNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) { long __functionAddress = Functions.GraphAddExternalSemaphoresSignalNode; if (CHECKS) { check(__functionAddress); check(hGraph); CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress); } /** * Creates an external semaphore signal node and adds it to a graph. * *

Creates a new external semaphore signal node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} * and arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of * the graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

Performs a signal operation on a set of externally allocated semaphore objects when the node is launched. The operation(s) will occur after all of the * node's dependencies have completed.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param nodeParams parameters for the node */ @NativeType("CUresult") public static int cuGraphAddExternalSemaphoresSignalNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddExternalSemaphoresSignalNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address()); } // --- [ cuGraphExternalSemaphoresSignalNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphExternalSemaphoresSignalNodeGetParams GraphExternalSemaphoresSignalNodeGetParams} */ public static int ncuGraphExternalSemaphoresSignalNodeGetParams(long hNode, long params_out) { long __functionAddress = Functions.GraphExternalSemaphoresSignalNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, params_out, __functionAddress); } /** * Returns an external semaphore signal node's parameters. * *

Returns the parameters of an external semaphore signal node {@code hNode} in {@code params_out}. The {@code extSemArray} and {@code paramsArray} * returned in {@code params_out}, are owned by the node. This memory remains valid until the node is destroyed or its parameters are modified, and should * not be modified directly. Use {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} to update the parameters of this node.

* * @param hNode node to get the parameters for * @param params_out pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphExternalSemaphoresSignalNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out) { return ncuGraphExternalSemaphoresSignalNodeGetParams(hNode, params_out.address()); } // --- [ cuGraphExternalSemaphoresSignalNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} */ public static int ncuGraphExternalSemaphoresSignalNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphExternalSemaphoresSignalNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets an external semaphore signal node's parameters. * *

Sets the parameters of an external semaphore signal node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphExternalSemaphoresSignalNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) { return ncuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddExternalSemaphoresWaitNode ] --- /** * Unsafe version of: {@link #cuGraphAddExternalSemaphoresWaitNode GraphAddExternalSemaphoresWaitNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddExternalSemaphoresWaitNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) { long __functionAddress = Functions.GraphAddExternalSemaphoresWaitNode; if (CHECKS) { check(__functionAddress); check(hGraph); CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress); } /** * Creates an external semaphore wait node and adds it to a graph. * *

Creates a new external semaphore wait node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} * and arguments specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of * the graph. {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

Performs a wait operation on a set of externally allocated semaphore objects when the node is launched. The node's dependencies will not be launched * until the wait operation has completed.

* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param nodeParams parameters for the node */ @NativeType("CUresult") public static int cuGraphAddExternalSemaphoresWaitNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddExternalSemaphoresWaitNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address()); } // --- [ cuGraphExternalSemaphoresWaitNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphExternalSemaphoresWaitNodeGetParams GraphExternalSemaphoresWaitNodeGetParams} */ public static int ncuGraphExternalSemaphoresWaitNodeGetParams(long hNode, long params_out) { long __functionAddress = Functions.GraphExternalSemaphoresWaitNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, params_out, __functionAddress); } /** * Returns an external semaphore wait node's parameters. * *

Returns the parameters of an external semaphore wait node {@code hNode} in {@code params_out}. The {@code extSemArray} and {@code paramsArray} returned * in {@code params_out}, are owned by the node. This memory remains valid until the node is destroyed or its parameters are modified, and should not be * modified directly. Use {@link #cuGraphExternalSemaphoresSignalNodeSetParams GraphExternalSemaphoresSignalNodeSetParams} to update the parameters of this node.

* * @param hNode node to get the parameters for * @param params_out pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphExternalSemaphoresWaitNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS *") CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out) { return ncuGraphExternalSemaphoresWaitNodeGetParams(hNode, params_out.address()); } // --- [ cuGraphExternalSemaphoresWaitNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExternalSemaphoresWaitNodeSetParams GraphExternalSemaphoresWaitNodeSetParams} */ public static int ncuGraphExternalSemaphoresWaitNodeSetParams(long hNode, long nodeParams) { long __functionAddress = Functions.GraphExternalSemaphoresWaitNodeSetParams; if (CHECKS) { check(__functionAddress); check(hNode); CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams); } return callPPI(hNode, nodeParams, __functionAddress); } /** * Sets an external semaphore wait node's parameters. * *

Sets the parameters of an external semaphore wait node {@code hNode} to {@code nodeParams}.

* * @param hNode node to set the parameters for * @param nodeParams parameters to copy */ @NativeType("CUresult") public static int cuGraphExternalSemaphoresWaitNodeSetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) { return ncuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams.address()); } // --- [ cuGraphAddMemAllocNode ] --- /** * Unsafe version of: {@link #cuGraphAddMemAllocNode GraphAddMemAllocNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddMemAllocNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long nodeParams) { long __functionAddress = Functions.GraphAddMemAllocNode; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, nodeParams, __functionAddress); } /** * Creates an allocation node and adds it to a graph. * *

Creates a new allocation node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and arguments * specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. * {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

When {@link #cuGraphAddMemAllocNode GraphAddMemAllocNode} creates an allocation node, it returns the address of the allocation in {@code nodeParams.dptr}. The allocation's address * remains fixed across instantiations and launches.

* *

If the allocation is freed in the same graph, by creating a free node using {@link #cuGraphAddMemFreeNode GraphAddMemFreeNode}, the allocation can be accessed by nodes ordered * after the allocation node but before the free node. These allocations cannot be freed outside the owning graph, and they can only be freed once in the * owning graph.

* *

If the allocation is not freed in the same graph, then it can be accessed not only by nodes in the graph which are ordered after the allocation node, * but also by stream operations ordered after the graph's execution but before the allocation is freed.

* *

Allocations which are not freed in the same graph can be freed by:

* *
    *
  • passing the allocation to {@link #cuMemFreeAsync MemFreeAsync} or {@link #cuMemFree MemFree};
  • *
  • launching a graph with a free node for that allocation; or
  • *
  • specifying {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH} during instantiation, which makes each launch behave as though it called * {@link #cuMemFreeAsync MemFreeAsync} for every unfreed allocation.
  • *
* *

It is not possible to free an allocation in both the owning graph and another graph. If the allocation is freed in the same graph, a free node cannot * be added to another graph. If the allocation is freed in another graph, a free node can no longer be added to the owning graph.

* *

The following restrictions apply to graphs which contain allocation and/or memory free nodes:

* *
    *
  • Nodes and edges of the graph cannot be deleted.
  • *
  • The graph cannot be used in a child node.
  • *
  • Only one instantiation of the graph may exist at any point in time.
  • *
  • The graph cannot be cloned.
  • *
* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param nodeParams parameters for the node */ @NativeType("CUresult") public static int cuGraphAddMemAllocNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUDA_MEM_ALLOC_NODE_PARAMS *") CUDA_MEM_ALLOC_NODE_PARAMS nodeParams) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddMemAllocNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), nodeParams.address()); } // --- [ cuGraphMemAllocNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphMemAllocNodeGetParams GraphMemAllocNodeGetParams} */ public static int ncuGraphMemAllocNodeGetParams(long hNode, long params_out) { long __functionAddress = Functions.GraphMemAllocNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, params_out, __functionAddress); } /** * Returns a memory alloc node's parameters. * *

Returns the parameters of a memory alloc node {@code hNode} in {@code params_out}. The {@code poolProps} and {@code accessDescs} returned in {@code * params_out}, are owned by the node. This memory remains valid until the node is destroyed. The returned parameters must not be modified.

* * @param hNode node to get the parameters for * @param params_out pointer to return the parameters */ @NativeType("CUresult") public static int cuGraphMemAllocNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEM_ALLOC_NODE_PARAMS *") CUDA_MEM_ALLOC_NODE_PARAMS params_out) { return ncuGraphMemAllocNodeGetParams(hNode, params_out.address()); } // --- [ cuGraphAddMemFreeNode ] --- /** * Unsafe version of: {@link #cuGraphAddMemFreeNode GraphAddMemFreeNode} * * @param numDependencies number of dependencies */ public static int ncuGraphAddMemFreeNode(long phGraphNode, long hGraph, long dependencies, long numDependencies, long dptr) { long __functionAddress = Functions.GraphAddMemFreeNode; if (CHECKS) { check(__functionAddress); check(hGraph); check(dptr); } return callPPPPPI(phGraphNode, hGraph, dependencies, numDependencies, dptr, __functionAddress); } /** * Creates a memory free node and adds it to a graph. * *

Creates a new memory free node and adds it to {@code hGraph} with {@code numDependencies} dependencies specified via {@code dependencies} and arguments * specified in {@code nodeParams}. It is possible for {@code numDependencies} to be 0, in which case the node will be placed at the root of the graph. * {@code dependencies} may not have any duplicate entries. A handle to the new node will be returned in {@code phGraphNode}.

* *

{@link #cuGraphAddMemFreeNode GraphAddMemFreeNode} will return {@link #CUDA_ERROR_INVALID_VALUE} if the user attempts to free:

* *
    *
  • an allocation twice in the same graph.
  • *
  • an address that was not returned by an allocation node.
  • *
  • an invalid address.
  • *
* *

The following restrictions apply to graphs which contain allocation and/or memory free nodes:

* *
    *
  • Nodes and edges of the graph cannot be deleted.
  • *
  • The graph cannot be used in a child node.
  • *
  • Only one instantiation of the graph may exist at any point in time.
  • *
  • The graph cannot be cloned.
  • *
* * @param phGraphNode returns newly created node * @param hGraph graph to which to add the node * @param dependencies dependencies of the node * @param dptr address of memory to free */ @NativeType("CUresult") public static int cuGraphAddMemFreeNode(@NativeType("CUgraphNode *") PointerBuffer phGraphNode, @NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer dependencies, @NativeType("CUdeviceptr") long dptr) { if (CHECKS) { check(phGraphNode, 1); } return ncuGraphAddMemFreeNode(memAddress(phGraphNode), hGraph, memAddressSafe(dependencies), remainingSafe(dependencies), dptr); } // --- [ cuGraphMemFreeNodeGetParams ] --- /** Unsafe version of: {@link #cuGraphMemFreeNodeGetParams GraphMemFreeNodeGetParams} */ public static int ncuGraphMemFreeNodeGetParams(long hNode, long dptr_out) { long __functionAddress = Functions.GraphMemFreeNodeGetParams; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, dptr_out, __functionAddress); } /** * Returns a memory free node's parameters. * *

Returns the address of a memory free node {@code hNode} in {@code dptr_out}.

* * @param hNode node to get the parameters for * @param dptr_out pointer to return the device address */ @NativeType("CUresult") public static int cuGraphMemFreeNodeGetParams(@NativeType("CUgraphNode") long hNode, @NativeType("CUdeviceptr *") PointerBuffer dptr_out) { if (CHECKS) { check(dptr_out, 1); } return ncuGraphMemFreeNodeGetParams(hNode, memAddress(dptr_out)); } // --- [ cuDeviceGraphMemTrim ] --- /** * Free unused memory that was cached on the specified device for use with graphs back to the OS. * *

Blocks which are not in use by a graph that is either currently executing or scheduled to execute are freed back to the operating system.

* * @param device the device for which cached memory should be freed */ @NativeType("CUresult") public static int cuDeviceGraphMemTrim(@NativeType("CUdevice") int device) { long __functionAddress = Functions.DeviceGraphMemTrim; if (CHECKS) { check(__functionAddress); } return callI(device, __functionAddress); } // --- [ cuDeviceGetGraphMemAttribute ] --- /** Unsafe version of: {@link #cuDeviceGetGraphMemAttribute DeviceGetGraphMemAttribute} */ public static int ncuDeviceGetGraphMemAttribute(int device, int attr, long value) { long __functionAddress = Functions.DeviceGetGraphMemAttribute; if (CHECKS) { check(__functionAddress); } return callPI(device, attr, value, __functionAddress); } /** * Query asynchronous allocation attributes related to graphs. * *

Valid attributes are:

* *
    *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT}: Amount of memory, in bytes, currently associated with graphs
  • *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can * only be reset to zero.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT}: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
* * @param device specifies the scope of the query * @param attr attribute to get * @param value retrieved value */ @NativeType("CUresult") public static int cuDeviceGetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") ByteBuffer value) { return ncuDeviceGetGraphMemAttribute(device, attr, memAddress(value)); } /** * Query asynchronous allocation attributes related to graphs. * *

Valid attributes are:

* *
    *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT GRAPH_MEM_ATTR_USED_MEM_CURRENT}: Amount of memory, in bytes, currently associated with graphs
  • *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can * only be reset to zero.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT}: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
* * @param device specifies the scope of the query * @param attr attribute to get * @param value retrieved value */ @NativeType("CUresult") public static int cuDeviceGetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") LongBuffer value) { return ncuDeviceGetGraphMemAttribute(device, attr, memAddress(value)); } // --- [ cuDeviceSetGraphMemAttribute ] --- /** Unsafe version of: {@link #cuDeviceSetGraphMemAttribute DeviceSetGraphMemAttribute} */ public static int ncuDeviceSetGraphMemAttribute(int device, int attr, long value) { long __functionAddress = Functions.DeviceSetGraphMemAttribute; if (CHECKS) { check(__functionAddress); } return callPI(device, attr, value, __functionAddress); } /** * Set asynchronous allocation attributes related to graphs. * *

Valid attributes are:

* *
    *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can * only be reset to zero.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
* * @param device specifies the scope of the query * @param attr attribute to get * @param value pointer to value to set */ @NativeType("CUresult") public static int cuDeviceSetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") ByteBuffer value) { return ncuDeviceSetGraphMemAttribute(device, attr, memAddress(value)); } /** * Set asynchronous allocation attributes related to graphs. * *

Valid attributes are:

* *
    *
  • {@link #CU_GRAPH_MEM_ATTR_USED_MEM_HIGH GRAPH_MEM_ATTR_USED_MEM_HIGH}: High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can * only be reset to zero.
  • *
  • {@link #CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH GRAPH_MEM_ATTR_RESERVED_MEM_HIGH}: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
  • *
* * @param device specifies the scope of the query * @param attr attribute to get * @param value pointer to value to set */ @NativeType("CUresult") public static int cuDeviceSetGraphMemAttribute(@NativeType("CUdevice") int device, @NativeType("CUgraphMem_attribute") int attr, @NativeType("void *") LongBuffer value) { return ncuDeviceSetGraphMemAttribute(device, attr, memAddress(value)); } // --- [ cuGraphClone ] --- /** Unsafe version of: {@link #cuGraphClone GraphClone} */ public static int ncuGraphClone(long phGraphClone, long originalGraph) { long __functionAddress = Functions.GraphClone; if (CHECKS) { check(__functionAddress); check(originalGraph); } return callPPI(phGraphClone, originalGraph, __functionAddress); } /** * Clones a graph. * *

This function creates a copy of {@code originalGraph} and returns it in {@code phGraphClone}. All parameters are copied into the cloned graph. The * original graph may be modified after this call without affecting the clone.

* *

Child graph nodes in the original graph are recursively copied into the clone.

* * @param phGraphClone returns newly created cloned graph * @param originalGraph graph to clone */ @NativeType("CUresult") public static int cuGraphClone(@NativeType("CUgraph *") PointerBuffer phGraphClone, @NativeType("CUgraph") long originalGraph) { if (CHECKS) { check(phGraphClone, 1); } return ncuGraphClone(memAddress(phGraphClone), originalGraph); } // --- [ cuGraphNodeFindInClone ] --- /** Unsafe version of: {@link #cuGraphNodeFindInClone GraphNodeFindInClone} */ public static int ncuGraphNodeFindInClone(long phNode, long hOriginalNode, long hClonedGraph) { long __functionAddress = Functions.GraphNodeFindInClone; if (CHECKS) { check(__functionAddress); check(hOriginalNode); check(hClonedGraph); } return callPPPI(phNode, hOriginalNode, hClonedGraph, __functionAddress); } /** * Finds a cloned version of a node. * *

This function returns the node in {@code hClonedGraph} corresponding to {@code hOriginalNode} in the original graph.

* *

{@code hClonedGraph} must have been cloned from {@code hOriginalGraph} via {@link #cuGraphClone GraphClone}. {@code hOriginalNode} must have been in {@code * hOriginalGraph} at the time of the call to {@link #cuGraphClone GraphClone}, and the corresponding cloned node in {@code hClonedGraph} must not have been removed. The * cloned node is then returned via {@code phClonedNode}.

* * @param phNode returns handle to the cloned node * @param hOriginalNode handle to the original node * @param hClonedGraph cloned graph to query */ @NativeType("CUresult") public static int cuGraphNodeFindInClone(@NativeType("CUgraphNode *") PointerBuffer phNode, @NativeType("CUgraphNode") long hOriginalNode, @NativeType("CUgraph") long hClonedGraph) { if (CHECKS) { check(phNode, 1); } return ncuGraphNodeFindInClone(memAddress(phNode), hOriginalNode, hClonedGraph); } // --- [ cuGraphNodeGetType ] --- /** Unsafe version of: {@link #cuGraphNodeGetType GraphNodeGetType} */ public static int ncuGraphNodeGetType(long hNode, long type) { long __functionAddress = Functions.GraphNodeGetType; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, type, __functionAddress); } /** * Returns a node's type. * *

Returns the node type of {@code hNode} in {@code type}.

* * @param hNode node to query * @param type pointer to return the node type */ @NativeType("CUresult") public static int cuGraphNodeGetType(@NativeType("CUgraphNode") long hNode, @NativeType("CUgraphNodeType *") IntBuffer type) { if (CHECKS) { check(type, 1); } return ncuGraphNodeGetType(hNode, memAddress(type)); } // --- [ cuGraphGetNodes ] --- /** * Unsafe version of: {@link #cuGraphGetNodes GraphGetNodes} * * @param numNodes see description */ public static int ncuGraphGetNodes(long hGraph, long nodes, long numNodes) { long __functionAddress = Functions.GraphGetNodes; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPI(hGraph, nodes, numNodes, __functionAddress); } /** * Returns a graph's nodes. * *

Returns a list of {@code hGraph's} nodes. {@code nodes} may be {@code NULL}, in which case this function will return the number of nodes in {@code numNodes}. * Otherwise, {@code numNodes} entries will be filled in. If {@code numNodes} is higher than the actual number of nodes, the remaining entries in {@code * nodes} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in {@code numNodes}.

* * @param hGraph graph to query * @param nodes pointer to return the nodes * @param numNodes see description */ @NativeType("CUresult") public static int cuGraphGetNodes(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer nodes, @NativeType("size_t *") PointerBuffer numNodes) { if (CHECKS) { check(numNodes, 1); checkSafe(nodes, numNodes.get(numNodes.position())); } return ncuGraphGetNodes(hGraph, memAddressSafe(nodes), memAddress(numNodes)); } // --- [ cuGraphGetRootNodes ] --- /** * Unsafe version of: {@link #cuGraphGetRootNodes GraphGetRootNodes} * * @param numRootNodes see description */ public static int ncuGraphGetRootNodes(long hGraph, long rootNodes, long numRootNodes) { long __functionAddress = Functions.GraphGetRootNodes; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPI(hGraph, rootNodes, numRootNodes, __functionAddress); } /** * Returns a graph's root nodes. * *

Returns a list of {@code hGraph's} root nodes. {@code rootNodes} may be {@code NULL}, in which case this function will return the number of root nodes in * {@code numRootNodes}. Otherwise, {@code numRootNodes} entries will be filled in. If {@code numRootNodes} is higher than the actual number of root * nodes, the remaining entries in {@code rootNodes} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in {@code * numRootNodes}.

* * @param hGraph graph to query * @param rootNodes pointer to return the root nodes * @param numRootNodes see description */ @NativeType("CUresult") public static int cuGraphGetRootNodes(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer rootNodes, @NativeType("size_t *") PointerBuffer numRootNodes) { if (CHECKS) { check(numRootNodes, 1); checkSafe(rootNodes, numRootNodes.get(numRootNodes.position())); } return ncuGraphGetRootNodes(hGraph, memAddressSafe(rootNodes), memAddress(numRootNodes)); } // --- [ cuGraphGetEdges ] --- /** * Unsafe version of: {@link #cuGraphGetEdges GraphGetEdges} * * @param numEdges see description */ public static int ncuGraphGetEdges(long hGraph, long from, long to, long numEdges) { long __functionAddress = Functions.GraphGetEdges; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPI(hGraph, from, to, numEdges, __functionAddress); } /** * Returns a graph's dependency edges. * *

Returns a list of {@code hGraph's} dependency edges. Edges are returned via corresponding indices in {@code from} and {@code to;} that is, the node in * {@code to[i]} has a dependency on the node in {@code from[i]}. {@code from} and {@code to} may both be {@code NULL}, in which case this function only returns * the number of edges in {@code numEdges}. Otherwise, {@code numEdges} entries will be filled in. If {@code numEdges} is higher than the actual number of * edges, the remaining entries in {@code from} and {@code to} will be set to {@code NULL}, and the number of edges actually returned will be written to * {@code numEdges}.

* * @param hGraph graph to get the edges from * @param from location to return edge endpoints * @param to location to return edge endpoints * @param numEdges see description */ @NativeType("CUresult") public static int cuGraphGetEdges(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode *") PointerBuffer from, @Nullable @NativeType("CUgraphNode *") PointerBuffer to, @NativeType("size_t *") PointerBuffer numEdges) { if (CHECKS) { check(numEdges, 1); checkSafe(from, numEdges.get(numEdges.position())); checkSafe(to, numEdges.get(numEdges.position())); } return ncuGraphGetEdges(hGraph, memAddressSafe(from), memAddressSafe(to), memAddress(numEdges)); } // --- [ cuGraphNodeGetDependencies ] --- /** * Unsafe version of: {@link #cuGraphNodeGetDependencies GraphNodeGetDependencies} * * @param numDependencies see description */ public static int ncuGraphNodeGetDependencies(long hNode, long dependencies, long numDependencies) { long __functionAddress = Functions.GraphNodeGetDependencies; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPPI(hNode, dependencies, numDependencies, __functionAddress); } /** * Returns a node's dependencies. * *

Returns a list of {@code node's} dependencies. {@code dependencies} may be {@code NULL}, in which case this function will return the number of dependencies in * {@code numDependencies}. Otherwise, {@code numDependencies} entries will be filled in. If {@code numDependencies} is higher than the actual number of * dependencies, the remaining entries in {@code dependencies} will be set to {@code NULL}, and the number of nodes actually obtained will be returned in * {@code numDependencies}.

* * @param hNode node to query * @param dependencies pointer to return the dependencies * @param numDependencies see description */ @NativeType("CUresult") public static int cuGraphNodeGetDependencies(@NativeType("CUgraphNode") long hNode, @Nullable @NativeType("CUgraphNode *") PointerBuffer dependencies, @NativeType("size_t *") PointerBuffer numDependencies) { if (CHECKS) { check(numDependencies, 1); checkSafe(dependencies, numDependencies.get(numDependencies.position())); } return ncuGraphNodeGetDependencies(hNode, memAddressSafe(dependencies), memAddress(numDependencies)); } // --- [ cuGraphNodeGetDependentNodes ] --- /** * Unsafe version of: {@link #cuGraphNodeGetDependentNodes GraphNodeGetDependentNodes} * * @param numDependentNodes see description */ public static int ncuGraphNodeGetDependentNodes(long hNode, long dependentNodes, long numDependentNodes) { long __functionAddress = Functions.GraphNodeGetDependentNodes; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPPI(hNode, dependentNodes, numDependentNodes, __functionAddress); } /** * Returns a node's dependent nodes. * *

Returns a list of {@code node's} dependent nodes. {@code dependentNodes} may be {@code NULL}, in which case this function will return the number of dependent * nodes in {@code numDependentNodes}. Otherwise, {@code numDependentNodes} entries will be filled in. If {@code numDependentNodes} is higher than the * actual number of dependent nodes, the remaining entries in {@code dependentNodes} will be set to {@code NULL}, and the number of nodes actually obtained will * be returned in {@code numDependentNodes}.

* * @param hNode node to query * @param dependentNodes pointer to return the dependent nodes * @param numDependentNodes see description */ @NativeType("CUresult") public static int cuGraphNodeGetDependentNodes(@NativeType("CUgraphNode") long hNode, @Nullable @NativeType("CUgraphNode *") PointerBuffer dependentNodes, @NativeType("size_t *") PointerBuffer numDependentNodes) { if (CHECKS) { check(numDependentNodes, 1); checkSafe(dependentNodes, numDependentNodes.get(numDependentNodes.position())); } return ncuGraphNodeGetDependentNodes(hNode, memAddressSafe(dependentNodes), memAddress(numDependentNodes)); } // --- [ cuGraphAddDependencies ] --- /** * Unsafe version of: {@link #cuGraphAddDependencies GraphAddDependencies} * * @param numDependencies number of dependencies to be added */ public static int ncuGraphAddDependencies(long hGraph, long from, long to, long numDependencies) { long __functionAddress = Functions.GraphAddDependencies; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPI(hGraph, from, to, numDependencies, __functionAddress); } /** * Adds dependency edges to a graph. * *

The number of dependencies to be added is defined by {@code numDependencies} Elements in {@code from} and {@code to} at corresponding indices define a * dependency. Each node in {@code from} and {@code to} must belong to {@code hGraph}.

* *

If {@code numDependencies} is 0, elements in {@code from} and {@code to} will be ignored. Specifying an existing dependency will return an error.

* * @param hGraph graph to which dependencies are added * @param from array of nodes that provide the dependencies * @param to array of dependent nodes */ @NativeType("CUresult") public static int cuGraphAddDependencies(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer from, @Nullable @NativeType("CUgraphNode const *") PointerBuffer to) { if (CHECKS) { checkSafe(to, remainingSafe(from)); } return ncuGraphAddDependencies(hGraph, memAddressSafe(from), memAddressSafe(to), remainingSafe(from)); } // --- [ cuGraphRemoveDependencies ] --- /** * Unsafe version of: {@link #cuGraphRemoveDependencies GraphRemoveDependencies} * * @param numDependencies number of dependencies to be removed */ public static int ncuGraphRemoveDependencies(long hGraph, long from, long to, long numDependencies) { long __functionAddress = Functions.GraphRemoveDependencies; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPI(hGraph, from, to, numDependencies, __functionAddress); } /** * Removes dependency edges from a graph. * *

The number of {@code dependencies} to be removed is defined by {@code numDependencies}. Elements in {@code from} and {@code to} at corresponding * indices define a dependency. Each node in {@code from} and {@code to} must belong to {@code hGraph}.

* *

If {@code numDependencies} is 0, elements in {@code from} and {@code to} will be ignored. Specifying a non-existing dependency will return an error.

* *

Dependencies cannot be removed from graphs which contain allocation or free nodes. Any attempt to do so will return an error.

* * @param hGraph graph from which to remove dependencies * @param from array of nodes that provide the dependencies * @param to array of dependent nodes */ @NativeType("CUresult") public static int cuGraphRemoveDependencies(@NativeType("CUgraph") long hGraph, @Nullable @NativeType("CUgraphNode const *") PointerBuffer from, @Nullable @NativeType("CUgraphNode const *") PointerBuffer to) { if (CHECKS) { checkSafe(to, remainingSafe(from)); } return ncuGraphRemoveDependencies(hGraph, memAddressSafe(from), memAddressSafe(to), remainingSafe(from)); } // --- [ cuGraphDestroyNode ] --- /** * Remove a node from the graph. * *

Removes {@code hNode} from its graph. This operation also severs any dependencies of other nodes on {@code hNode} and vice versa.

* *

Nodes which belong to a graph which contains allocation or free nodes cannot be destroyed. Any attempt to do so will return an error.

* * @param hNode node to remove */ @NativeType("CUresult") public static int cuGraphDestroyNode(@NativeType("CUgraphNode") long hNode) { long __functionAddress = Functions.GraphDestroyNode; if (CHECKS) { check(__functionAddress); check(hNode); } return callPI(hNode, __functionAddress); } // --- [ cuGraphInstantiate ] --- /** * Unsafe version of: {@link #cuGraphInstantiate GraphInstantiate} * * @param bufferSize size of the log buffer in bytes */ public static int ncuGraphInstantiate(long phGraphExec, long hGraph, long phErrorNode, long logBuffer, long bufferSize) { long __functionAddress = Functions.GraphInstantiate; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPPPPI(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize, __functionAddress); } /** * Creates an executable graph from a graph. * *

Instantiates {@code hGraph} as an executable graph. The graph is validated for any structural constraints or intra-node constraints which were not * previously validated. If instantiation is successful, a handle to the instantiated graph is returned in {@code phGraphExec}.

* *

If there are any errors, diagnostic information may be returned in {@code errorNode} and {@code logBuffer}. This is the primary way to inspect * instantiation errors. The output will be null terminated unless the diagnostics overflow the buffer. In this case, they will be truncated, and the last * byte can be inspected to determine if truncation occurred.

* * @param phGraphExec returns instantiated graph * @param hGraph graph to instantiate * @param phErrorNode in case of an instantiation error, this may be modified to indicate a node contributing to the error * @param logBuffer a character buffer to store diagnostic messages */ @NativeType("CUresult") public static int cuGraphInstantiate(@NativeType("CUgraphExec *") PointerBuffer phGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("CUgraphNode *") PointerBuffer phErrorNode, @NativeType("char *") ByteBuffer logBuffer) { if (CHECKS) { check(phGraphExec, 1); check(phErrorNode, 1); } return ncuGraphInstantiate(memAddress(phGraphExec), hGraph, memAddress(phErrorNode), memAddress(logBuffer), logBuffer.remaining()); } // --- [ cuGraphInstantiateWithFlags ] --- /** Unsafe version of: {@link #cuGraphInstantiateWithFlags GraphInstantiateWithFlags} */ public static int ncuGraphInstantiateWithFlags(long phGraphExec, long hGraph, long flags) { long __functionAddress = Functions.GraphInstantiateWithFlags; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPJI(phGraphExec, hGraph, flags, __functionAddress); } /** * Creates an executable graph from a graph. * *

Instantiates {@code hGraph} as an executable graph. The graph is validated for any structural constraints or intra-node constraints which were not * previously validated. If instantiation is successful, a handle to the instantiated graph is returned in {@code phGraphExec}.

* *

The {@code flags} parameter controls the behavior of instantiation and subsequent graph launches. Valid flags are:

* *
    *
  • {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH}, which configures a graph containing memory allocation nodes to automatically free any unfreed * memory allocations before the graph is relaunched.
  • *
* *

If {@code hGraph} contains any allocation or free nodes, there can be at most one executable graph in existence for that graph at a time.

* *

An attempt to instantiate a second executable graph before destroying the first with {@link #cuGraphExecDestroy GraphExecDestroy} will result in an error.

* * @param phGraphExec returns instantiated graph * @param hGraph graph to instantiate * @param flags flags to control instantiation. See {@code CUgraphInstantiate_flags}. */ @NativeType("CUresult") public static int cuGraphInstantiateWithFlags(@NativeType("CUgraphExec *") PointerBuffer phGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("unsigned long long") long flags) { if (CHECKS) { check(phGraphExec, 1); } return ncuGraphInstantiateWithFlags(memAddress(phGraphExec), hGraph, flags); } // --- [ cuGraphExecKernelNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecKernelNodeSetParams GraphExecKernelNodeSetParams} */ public static int ncuGraphExecKernelNodeSetParams(long hGraphExec, long hNode, long nodeParams) { long __functionAddress = Functions.GraphExecKernelNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); CUDA_KERNEL_NODE_PARAMS.validate(nodeParams); } return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress); } /** * Sets the parameters for a kernel node in the given {@code graphExec}. * *

Sets the parameters of a kernel node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the * non-executable graph, from which the executable graph was instantiated.

* *

{@code hNode} must not have been removed from the original graph. The {@code func} field of {@code nodeParams} cannot be modified and must match the * original value. All other values can be modified.

* *

The modifications take effect at the next launch of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode kernel node from the graph from which graphExec was instantiated * @param nodeParams updated parameters to set */ @NativeType("CUresult") public static int cuGraphExecKernelNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_KERNEL_NODE_PARAMS const *") CUDA_KERNEL_NODE_PARAMS nodeParams) { return ncuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams.address()); } // --- [ cuGraphExecMemcpyNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecMemcpyNodeSetParams GraphExecMemcpyNodeSetParams} */ public static int ncuGraphExecMemcpyNodeSetParams(long hGraphExec, long hNode, long copyParams, long ctx) { long __functionAddress = Functions.GraphExecMemcpyNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); check(ctx); } return callPPPPI(hGraphExec, hNode, copyParams, ctx, __functionAddress); } /** * Sets the parameters for a memcpy node in the given {@code graphExec}. * *

Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code copyParams} at instantiation. * {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.

* *

The source and destination memory in {@code copyParams} must be allocated from the same contexts as the original source and destination memory. Both * the instantiation-time memory operands and the memory operands in {@code copyParams} must be 1-dimensional. Zero-length operations are not supported.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. hNode is also not modified by this call.

* *

Returns {@link #CUDA_ERROR_INVALID_VALUE} if the memory operands' mappings changed or either the original or new memory operands are multidimensional.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode memcpy node from the graph which was used to instantiate graphExec * @param copyParams the updated parameters to set * @param ctx context on which to run the node */ @NativeType("CUresult") public static int cuGraphExecMemcpyNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMCPY3D const *") CUDA_MEMCPY3D copyParams, @NativeType("CUcontext") long ctx) { return ncuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams.address(), ctx); } // --- [ cuGraphExecMemsetNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecMemsetNodeSetParams GraphExecMemsetNodeSetParams} */ public static int ncuGraphExecMemsetNodeSetParams(long hGraphExec, long hNode, long memsetParams, long ctx) { long __functionAddress = Functions.GraphExecMemsetNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); CUDA_MEMSET_NODE_PARAMS.validate(memsetParams); check(ctx); } return callPPPPI(hGraphExec, hNode, memsetParams, ctx, __functionAddress); } /** * Sets the parameters for a {@code memset} node in the given {@code graphExec}. * *

Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code memsetParams} at instantiation. * {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.

* *

The destination memory in {@code memsetParams} must be allocated from the same contexts as the original destination memory. Both the instantiation-time * memory operand and the memory operand in {@code memsetParams} must be 1-dimensional. Zero-length operations are not supported.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. hNode is also not modified by this call.

* *

Returns CUDA_ERROR_INVALID_VALUE if the memory operand's mappings changed or either the original or new memory operand are multidimensional.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode memset node from the graph which was used to instantiate graphExec * @param memsetParams the updated parameters to set * @param ctx context on which to run the node */ @NativeType("CUresult") public static int cuGraphExecMemsetNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_MEMSET_NODE_PARAMS const *") CUDA_MEMSET_NODE_PARAMS memsetParams, @NativeType("CUcontext") long ctx) { return ncuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams.address(), ctx); } // --- [ cuGraphExecHostNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecHostNodeSetParams GraphExecHostNodeSetParams} */ public static int ncuGraphExecHostNodeSetParams(long hGraphExec, long hNode, long nodeParams) { long __functionAddress = Functions.GraphExecHostNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); CUDA_HOST_NODE_PARAMS.validate(nodeParams); } return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress); } /** * Sets the parameters for a host node in the given {@code graphExec}. * *

Updates the work represented by {@code hNode} in {@code hGraphExec} as though {@code hNode} had contained {@code nodeParams} at instantiation. * {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges to and from {@code hNode} are ignored.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. hNode is also not modified by this call.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode host node from the graph which was used to instantiate graphExec * @param nodeParams the updated parameters to set */ @NativeType("CUresult") public static int cuGraphExecHostNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_HOST_NODE_PARAMS const *") CUDA_HOST_NODE_PARAMS nodeParams) { return ncuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams.address()); } // --- [ cuGraphExecChildGraphNodeSetParams ] --- /** * Updates node parameters in the child graph node in the given {@code graphExec}. * *

Updates the work represented by {@code hNode} in {@code hGraphExec} as though the nodes contained in {@code hNode's} graph had the parameters contained * in {@code childGraph's} nodes at instantiation. {@code hNode} must remain in the graph which was used to instantiate {@code hGraphExec}. Changed edges * to and from {@code hNode} are ignored.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* *

The topology of {@code childGraph}, as well as the node insertion order, must match that of the graph contained in {@code hNode}. See * {@link #cuGraphExecUpdate GraphExecUpdate} for a list of restrictions on what can be updated in an instantiated graph. The update is recursive, so child graph nodes * contained within the top level child graph will also be updated.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode host node from the graph which was used to instantiate {@code graphExec} * @param childGraph the graph supplying the updated parameters */ @NativeType("CUresult") public static int cuGraphExecChildGraphNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUgraph") long childGraph) { long __functionAddress = Functions.GraphExecChildGraphNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); check(childGraph); } return callPPPI(hGraphExec, hNode, childGraph, __functionAddress); } // --- [ cuGraphExecEventRecordNodeSetEvent ] --- /** * Sets the event for an event record node in the given {@code graphExec}. * *

Sets the event of an event record node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the * non-executable graph, from which the executable graph was instantiated.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode event record node from the graph from which graphExec was instantiated * @param event updated event to use */ @NativeType("CUresult") public static int cuGraphExecEventRecordNodeSetEvent(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) { long __functionAddress = Functions.GraphExecEventRecordNodeSetEvent; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); check(event); } return callPPPI(hGraphExec, hNode, event, __functionAddress); } // --- [ cuGraphExecEventWaitNodeSetEvent ] --- /** * Sets the event for an event wait node in the given {@code graphExec}. * *

Sets the event of an event wait node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node {@code hNode} in the * non-executable graph, from which the executable graph was instantiated.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode event wait node from the graph from which graphExec was instantiated * @param event updated event to use */ @NativeType("CUresult") public static int cuGraphExecEventWaitNodeSetEvent(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUevent") long event) { long __functionAddress = Functions.GraphExecEventWaitNodeSetEvent; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); check(event); } return callPPPI(hGraphExec, hNode, event, __functionAddress); } // --- [ cuGraphExecExternalSemaphoresSignalNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecExternalSemaphoresSignalNodeSetParams GraphExecExternalSemaphoresSignalNodeSetParams} */ public static int ncuGraphExecExternalSemaphoresSignalNodeSetParams(long hGraphExec, long hNode, long nodeParams) { long __functionAddress = Functions.GraphExecExternalSemaphoresSignalNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); CUDA_EXT_SEM_SIGNAL_NODE_PARAMS.validate(nodeParams); } return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress); } /** * Sets the parameters for an external semaphore signal node in the given {@code graphExec}. * *

Sets the parameters of an external semaphore signal node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node * {@code hNode} in the non-executable graph, from which the executable graph was instantiated.

* *

{@code hNode} must not have been removed from the original graph.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* *

Changing {@code nodeParams->numExtSems} is not supported.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode semaphore signal node from the graph from which graphExec was instantiated * @param nodeParams updated Parameters to set */ @NativeType("CUresult") public static int cuGraphExecExternalSemaphoresSignalNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_SIGNAL_NODE_PARAMS const *") CUDA_EXT_SEM_SIGNAL_NODE_PARAMS nodeParams) { return ncuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams.address()); } // --- [ cuGraphExecExternalSemaphoresWaitNodeSetParams ] --- /** Unsafe version of: {@link #cuGraphExecExternalSemaphoresWaitNodeSetParams GraphExecExternalSemaphoresWaitNodeSetParams} */ public static int ncuGraphExecExternalSemaphoresWaitNodeSetParams(long hGraphExec, long hNode, long nodeParams) { long __functionAddress = Functions.GraphExecExternalSemaphoresWaitNodeSetParams; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hNode); CUDA_EXT_SEM_WAIT_NODE_PARAMS.validate(nodeParams); } return callPPPI(hGraphExec, hNode, nodeParams, __functionAddress); } /** * Sets the parameters for an external semaphore wait node in the given graphExec. * *

Sets the parameters of an external semaphore wait node in an executable graph {@code hGraphExec}. The node is identified by the corresponding node * {@code hNode} in the non-executable graph, from which the executable graph was instantiated.

* *

{@code hNode} must not have been removed from the original graph.

* *

The modifications only affect future launches of {@code hGraphExec}. Already enqueued or running launches of {@code hGraphExec} are not affected by * this call. {@code hNode} is also not modified by this call.

* *

Changing {@code nodeParams->numExtSems} is not supported.

* * @param hGraphExec the executable graph in which to set the specified node * @param hNode semaphore wait node from the graph from which graphExec was instantiated * @param nodeParams updated Parameters to set */ @NativeType("CUresult") public static int cuGraphExecExternalSemaphoresWaitNodeSetParams(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraphNode") long hNode, @NativeType("CUDA_EXT_SEM_WAIT_NODE_PARAMS const *") CUDA_EXT_SEM_WAIT_NODE_PARAMS nodeParams) { return ncuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams.address()); } // --- [ cuGraphUpload ] --- /** * Uploads an executable graph in a stream. * *

Uploads {@code hGraphExec} to the device in {@code hStream} without executing it. Uploads of the same {@code hGraphExec} will be serialized. Each * upload is ordered behind both any previous work in {@code hStream} and any previous launches of {@code hGraphExec}. Uses memory cached by {@code * stream} to back the allocations owned by {@code hGraphExec}.

* * @param hGraphExec executable graph to upload * @param hStream stream in which to upload the graph */ @NativeType("CUresult") public static int cuGraphUpload(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.GraphUpload; if (CHECKS) { check(__functionAddress); check(hGraphExec); } return callPPI(hGraphExec, hStream, __functionAddress); } // --- [ cuGraphLaunch ] --- /** * Launches an executable graph in a stream. * *

Executes {@code hGraphExec} in {@code hStream}. Only one instance of {@code hGraphExec} may be executing at a time. Each launch is ordered behind both * any previous work in {@code hStream} and any previous launches of {@code hGraphExec}. To execute a graph concurrently, it must be instantiated multiple * times into multiple executable graphs.

* *

If any allocations created by {@code hGraphExec} remain unfreed (from a previous launch) and {@code hGraphExec} was not instantiated with * {@link #CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH}, the launch will fail with {@link #CUDA_ERROR_INVALID_VALUE}.

* * @param hGraphExec executable graph to launch * @param hStream stream in which to launch the graph */ @NativeType("CUresult") public static int cuGraphLaunch(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUstream") long hStream) { long __functionAddress = Functions.GraphLaunch; if (CHECKS) { check(__functionAddress); check(hGraphExec); } return callPPI(hGraphExec, hStream, __functionAddress); } // --- [ cuGraphExecDestroy ] --- /** * Destroys an executable graph. * *

Destroys the executable graph specified by {@code hGraphExec}, as well as all of its executable nodes. If the executable graph is in-flight, it will * not be terminated, but rather freed asynchronously on completion.

* * @param hGraphExec executable graph to destroy */ @NativeType("CUresult") public static int cuGraphExecDestroy(@NativeType("CUgraphExec") long hGraphExec) { long __functionAddress = Functions.GraphExecDestroy; if (CHECKS) { check(__functionAddress); check(hGraphExec); } return callPI(hGraphExec, __functionAddress); } // --- [ cuGraphDestroy ] --- /** * Destroys a graph. * *

Destroys the graph specified by {@code hGraph}, as well as all of its nodes.

* * @param hGraph graph to destroy */ @NativeType("CUresult") public static int cuGraphDestroy(@NativeType("CUgraph") long hGraph) { long __functionAddress = Functions.GraphDestroy; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPI(hGraph, __functionAddress); } // --- [ cuGraphExecUpdate ] --- /** Unsafe version of: {@link #cuGraphExecUpdate GraphExecUpdate} */ public static int ncuGraphExecUpdate(long hGraphExec, long hGraph, long hErrorNode_out, long updateResult_out) { long __functionAddress = Functions.GraphExecUpdate; if (CHECKS) { check(__functionAddress); check(hGraphExec); check(hGraph); } return callPPPPI(hGraphExec, hGraph, hErrorNode_out, updateResult_out, __functionAddress); } /** * Check whether an executable graph can be updated with a graph and perform the update if possible. * *

Updates the node parameters in the instantiated graph specified by {@code hGraphExec} with the node parameters in a topologically identical graph * specified by {@code hGraph}.

* *

Limitations:

* *
    *
  • Kernel nodes: * *
      *
    • The owning context of the function cannot change.
    • *
    • A node whose function originally did not use CUDA dynamic parallelism cannot be updated to a function which uses CDP
    • *
  • *
  • Memset and memcpy nodes: * *
      *
    • The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.
    • *
    • The source/destination memory must be allocated from the same contexts as the original source/destination memory.
    • *
    • Only 1D memsets can be changed.
    • *
  • *
  • Additional memcpy node restrictions: * *
      *
    • Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
    • *
  • *
  • External semaphore wait nodes and record nodes: * *
      *
    • Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
    • *
  • *
* *

Note: The API may add further restrictions in future releases. The return code should always be checked.

* *

{@code cuGraphExecUpdate} sets {@code updateResult_out} to {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} under the following conditions:

* *
    *
  • The count of nodes directly in {@code hGraphExec} and {@code hGraph} differ, in which case {@code hErrorNode_out} is {@code NULL}.
  • *
  • A node is deleted in {@code hGraph} but not not its pair from {@code hGraphExec}, in which case {@code hErrorNode_out} is {@code NULL}.
  • *
  • A node is deleted in {@code hGraphExec} but not its pair from {@code hGraph}, in which case {@code hErrorNode_out} is the pairless node from {@code * hGraph}.
  • *
  • The dependent nodes of a pair differ, in which case {@code hErrorNode_out} is the node from {@code hGraph}.
  • *
* *

{@code cuGraphExecUpdate} sets {@code updateResult_out} to:

* *
    *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR} if passed an invalid value.
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED} if the graph topology changed
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED} if the type of a node changed, in which case {@code hErrorNode_out} is set to the node from * {@code hGraph}.
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE} if the function changed in an unsupported way(see note above), in which case * {@code hErrorNode_out} is set to the node from {@code hGraph}
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED} if any parameters to a node changed in a way that is not supported, in which case * {@code hErrorNode_out} is set to the node from {@code hGraph}.
  • *
  • {@link #CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED} if something about a node is unsupported, like the node's type or configuration, in which case * {@code hErrorNode_out} is set to the node from {@code hGraph}
  • *
* *

If {@code updateResult_out} isn't set in one of the situations described above, the update check passes and cuGraphExecUpdate updates * {@code hGraphExec} to match the contents of {@code hGraph}. If an error happens during the update, {@code updateResult_out} will be set to * {@link #CU_GRAPH_EXEC_UPDATE_ERROR GRAPH_EXEC_UPDATE_ERROR}; otherwise, {@code updateResult_out} is set to {@link #CU_GRAPH_EXEC_UPDATE_SUCCESS GRAPH_EXEC_UPDATE_SUCCESS}.

* *

{@code cuGraphExecUpdate} returns {@link #CUDA_SUCCESS} when the updated was performed successfully. It returns {@link #CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE} if the * graph update was not performed because it included changes which violated constraints specific to instantiated graph update.

* * @param hGraphExec the instantiated graph to be updated * @param hGraph the graph containing the updated parameters * @param hErrorNode_out the node which caused the permissibility check to forbid the update, if any * @param updateResult_out whether the graph update was permitted. If was forbidden, the reason why. */ @NativeType("CUresult") public static int cuGraphExecUpdate(@NativeType("CUgraphExec") long hGraphExec, @NativeType("CUgraph") long hGraph, @NativeType("CUgraphNode *") PointerBuffer hErrorNode_out, @NativeType("CUgraphExecUpdateResult *") IntBuffer updateResult_out) { if (CHECKS) { check(hErrorNode_out, 1); check(updateResult_out, 1); } return ncuGraphExecUpdate(hGraphExec, hGraph, memAddress(hErrorNode_out), memAddress(updateResult_out)); } // --- [ cuGraphKernelNodeCopyAttributes ] --- /** * Copies attributes from source node to destination node. * *

Copies attributes from source node {@code src} to destination node {@code dst}. Both node must have the same context.

* * @param dst destination node * @param src source node. For list of attributes see {@code CUkernelNodeAttrID}. */ @NativeType("CUresult") public static int cuGraphKernelNodeCopyAttributes(@NativeType("CUgraphNode") long dst, @NativeType("CUgraphNode") long src) { long __functionAddress = Functions.GraphKernelNodeCopyAttributes; if (CHECKS) { check(__functionAddress); check(dst); check(src); } return callPPI(dst, src, __functionAddress); } // --- [ cuGraphKernelNodeGetAttribute ] --- /** Unsafe version of: {@link #cuGraphKernelNodeGetAttribute GraphKernelNodeGetAttribute} */ public static int ncuGraphKernelNodeGetAttribute(long hNode, int attr, long value_out) { long __functionAddress = Functions.GraphKernelNodeGetAttribute; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, attr, value_out, __functionAddress); } /** * Queries node attribute. * *

Queries attribute {@code attr} from node {@code hNode} and stores it in corresponding member of {@code value_out}.

*/ @NativeType("CUresult") public static int cuGraphKernelNodeGetAttribute(@NativeType("CUgraphNode") long hNode, @NativeType("CUkernelNodeAttrID") int attr, @NativeType("CUkernelNodeAttrValue *") CUkernelNodeAttrValue value_out) { return ncuGraphKernelNodeGetAttribute(hNode, attr, value_out.address()); } // --- [ cuGraphKernelNodeSetAttribute ] --- /** Unsafe version of: {@link #cuGraphKernelNodeSetAttribute GraphKernelNodeSetAttribute} */ public static int ncuGraphKernelNodeSetAttribute(long hNode, int attr, long value) { long __functionAddress = Functions.GraphKernelNodeSetAttribute; if (CHECKS) { check(__functionAddress); check(hNode); } return callPPI(hNode, attr, value, __functionAddress); } /** * Sets node attribute. * *

Sets attribute {@code attr} on node {@code hNode} from corresponding attribute of {@code value}.

*/ @NativeType("CUresult") public static int cuGraphKernelNodeSetAttribute(@NativeType("CUgraphNode") long hNode, @NativeType("CUkernelNodeAttrID") int attr, @NativeType("CUkernelNodeAttrValue const *") CUkernelNodeAttrValue value) { return ncuGraphKernelNodeSetAttribute(hNode, attr, value.address()); } // --- [ cuGraphDebugDotPrint ] --- /** Unsafe version of: {@link #cuGraphDebugDotPrint GraphDebugDotPrint} */ public static int ncuGraphDebugDotPrint(long hGraph, long path, int flags) { long __functionAddress = Functions.GraphDebugDotPrint; if (CHECKS) { check(__functionAddress); check(hGraph); } return callPPI(hGraph, path, flags, __functionAddress); } /** * Write a DOT file describing graph structure. * *

Using the provided {@code hGraph}, write to {@code path} a DOT formatted description of the graph. By default this includes the graph topology, node * types, node id, kernel names and memcpy direction. {@code flags} can be specified to write more detailed information about each node type such as * parameter values, kernel attributes, node and function handles.

* * @param hGraph the graph to create a DOT file from * @param path the path to write the DOT file to * @param flags flags from {@code CUgraphDebugDot_flags} for specifying which additional node information to write */ @NativeType("CUresult") public static int cuGraphDebugDotPrint(@NativeType("CUgraph") long hGraph, @NativeType("char const *") ByteBuffer path, @NativeType("unsigned int") int flags) { if (CHECKS) { checkNT1(path); } return ncuGraphDebugDotPrint(hGraph, memAddress(path), flags); } /** * Write a DOT file describing graph structure. * *

Using the provided {@code hGraph}, write to {@code path} a DOT formatted description of the graph. By default this includes the graph topology, node * types, node id, kernel names and memcpy direction. {@code flags} can be specified to write more detailed information about each node type such as * parameter values, kernel attributes, node and function handles.

* * @param hGraph the graph to create a DOT file from * @param path the path to write the DOT file to * @param flags flags from {@code CUgraphDebugDot_flags} for specifying which additional node information to write */ @NativeType("CUresult") public static int cuGraphDebugDotPrint(@NativeType("CUgraph") long hGraph, @NativeType("char const *") CharSequence path, @NativeType("unsigned int") int flags) { MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nUTF8(path, true); long pathEncoded = stack.getPointerAddress(); return ncuGraphDebugDotPrint(hGraph, pathEncoded, flags); } finally { stack.setPointer(stackPointer); } } // --- [ cuUserObjectCreate ] --- /** Unsafe version of: {@link #cuUserObjectCreate UserObjectCreate} */ public static int ncuUserObjectCreate(long object_out, long ptr, long destroy, int initialRefcount, int flags) { long __functionAddress = Functions.UserObjectCreate; if (CHECKS) { check(__functionAddress); check(ptr); } return callPPPI(object_out, ptr, destroy, initialRefcount, flags, __functionAddress); } /** * Create a user object. * *

Create a user object with the specified destructor callback and initial reference count. The initial references are owned by the caller.

* *

Destructor callbacks cannot make CUDA API calls and should avoid blocking behavior, as they are executed by a shared internal thread. Another thread * may be signaled to perform such actions, if it does not block forward progress of tasks scheduled through CUDA.

* *

See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.

* * @param object_out location to return the user object handle * @param ptr the pointer to pass to the destroy function * @param destroy callback to free the user object when it is no longer in use * @param initialRefcount the initial refcount to create the object with, typically 1. The initial references are owned by the calling thread. * @param flags currently it is required to pass {@link #CU_USER_OBJECT_NO_DESTRUCTOR_SYNC USER_OBJECT_NO_DESTRUCTOR_SYNC}, which is the only defined flag. This indicates that the destroy callback * cannot be waited on by any CUDA API. Users requiring synchronization of the callback should signal its completion manually. */ @NativeType("CUresult") public static int cuUserObjectCreate(@NativeType("CUuserObject *") PointerBuffer object_out, @NativeType("void *") long ptr, @NativeType("void (*) (void *)") CUhostFnI destroy, @NativeType("unsigned int") int initialRefcount, @NativeType("unsigned int") int flags) { if (CHECKS) { check(object_out, 1); } return ncuUserObjectCreate(memAddress(object_out), ptr, destroy.address(), initialRefcount, flags); } // --- [ cuUserObjectRetain ] --- /** * Retain a reference to a user object. * *

Retains new references to a user object. The new references are owned by the caller.

* *

See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.

* * @param object the object to retain * @param count the number of references to retain, typically 1. Must be nonzero and not larger than INT_MAX. */ @NativeType("CUresult") public static int cuUserObjectRetain(@NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) { long __functionAddress = Functions.UserObjectRetain; if (CHECKS) { check(__functionAddress); check(object); } return callPI(object, count, __functionAddress); } // --- [ cuUserObjectRelease ] --- /** * Release a reference to a user object. * *

Releases user object references owned by the caller. The object's destructor is invoked if the reference count reaches zero.

* *

It is undefined behavior to release references not owned by the caller, or to use a user object handle after all references are released.

* *

See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.

* * @param object the object to release * @param count the number of references to release, typically 1. Must be nonzero and not larger than INT_MAX. */ @NativeType("CUresult") public static int cuUserObjectRelease(@NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) { long __functionAddress = Functions.UserObjectRelease; if (CHECKS) { check(__functionAddress); check(object); } return callPI(object, count, __functionAddress); } // --- [ cuGraphRetainUserObject ] --- /** * Retain a reference to a user object from a graph. * *

Creates or moves user object references that will be owned by a CUDA graph.

* *

See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.

* * @param graph the graph to associate the reference with * @param object the user object to retain a reference for * @param count the number of references to add to the graph, typically 1. Must be nonzero and not larger than INT_MAX. * @param flags the optional flag {@link #CU_GRAPH_USER_OBJECT_MOVE GRAPH_USER_OBJECT_MOVE} transfers references from the calling thread, rather than create new references. Pass 0 to create new * references. */ @NativeType("CUresult") public static int cuGraphRetainUserObject(@NativeType("CUgraph") long graph, @NativeType("CUuserObject") long object, @NativeType("unsigned int") int count, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.GraphRetainUserObject; if (CHECKS) { check(__functionAddress); check(graph); check(object); } return callPPI(graph, object, count, flags, __functionAddress); } // --- [ cuGraphReleaseUserObject ] --- /** * Release a user object reference from a graph. * *

Releases user object references owned by a graph.

* *

See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.

* * @param graph the graph that will release the reference * @param object the user object to release a reference for * @param count the number of references to release, typically 1. Must be nonzero and not larger than INT_MAX. */ @NativeType("CUresult") public static int cuGraphReleaseUserObject(@NativeType("CUgraph") long graph, @NativeType("CUuserObject") long object, @NativeType("unsigned int") int count) { long __functionAddress = Functions.GraphReleaseUserObject; if (CHECKS) { check(__functionAddress); check(graph); check(object); } return callPPI(graph, object, count, __functionAddress); } // --- [ cuOccupancyMaxActiveBlocksPerMultiprocessor ] --- /** Unsafe version of: {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor} */ public static int ncuOccupancyMaxActiveBlocksPerMultiprocessor(long numBlocks, long func, int blockSize, long dynamicSMemSize) { long __functionAddress = Functions.OccupancyMaxActiveBlocksPerMultiprocessor; if (CHECKS) { check(__functionAddress); check(func); } return callPPPI(numBlocks, func, blockSize, dynamicSMemSize, __functionAddress); } /** * Returns occupancy of a function. * *

Returns in {@code *numBlocks} the number of the maximum active blocks per streaming multiprocessor.

* * @param numBlocks returned occupancy * @param func kernel for which occupancy is calculated * @param blockSize block size the kernel is intended to be launched with * @param dynamicSMemSize per-block dynamic shared memory usage intended, in bytes */ @NativeType("CUresult") public static int cuOccupancyMaxActiveBlocksPerMultiprocessor(@NativeType("int *") IntBuffer numBlocks, @NativeType("CUfunction") long func, int blockSize, @NativeType("size_t") long dynamicSMemSize) { if (CHECKS) { check(numBlocks, 1); } return ncuOccupancyMaxActiveBlocksPerMultiprocessor(memAddress(numBlocks), func, blockSize, dynamicSMemSize); } // --- [ cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags ] --- /** Unsafe version of: {@link #cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags OccupancyMaxActiveBlocksPerMultiprocessorWithFlags} */ public static int ncuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(long numBlocks, long func, int blockSize, long dynamicSMemSize, int flags) { long __functionAddress = Functions.OccupancyMaxActiveBlocksPerMultiprocessorWithFlags; if (CHECKS) { check(__functionAddress); check(func); } return callPPPI(numBlocks, func, blockSize, dynamicSMemSize, flags, __functionAddress); } /** * Returns occupancy of a function. * *

Returns in {@code *numBlocks} the number of the maximum active blocks per streaming multiprocessor.

* *

The {@code Flags} parameter controls how special cases are handled. The valid flags are:

* *
    *
  • {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT}, which maintains the default behavior as {@link #cuOccupancyMaxActiveBlocksPerMultiprocessor OccupancyMaxActiveBlocksPerMultiprocessor};
  • *
  • {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE}, which suppresses the default behavior on platform where global caching affects occupancy. On such * platforms, if caching is enabled, but per-block SM resource usage would result in zero occupancy, the occupancy calculator will calculate the * occupancy as if caching is disabled. Setting {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} makes the occupancy calculator to return 0 in such cases. More * information can be found about this feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide.
  • *
* * @param numBlocks returned occupancy * @param func kernel for which occupancy is calculated * @param blockSize block size the kernel is intended to be launched with * @param dynamicSMemSize per-block dynamic shared memory usage intended, in bytes * @param flags requested behavior for the occupancy calculator */ @NativeType("CUresult") public static int cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(@NativeType("int *") IntBuffer numBlocks, @NativeType("CUfunction") long func, int blockSize, @NativeType("size_t") long dynamicSMemSize, @NativeType("unsigned int") int flags) { if (CHECKS) { check(numBlocks, 1); } return ncuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(memAddress(numBlocks), func, blockSize, dynamicSMemSize, flags); } // --- [ cuOccupancyMaxPotentialBlockSize ] --- /** Unsafe version of: {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize} */ public static int ncuOccupancyMaxPotentialBlockSize(long minGridSize, long blockSize, long func, long blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit) { long __functionAddress = Functions.OccupancyMaxPotentialBlockSize; if (CHECKS) { check(__functionAddress); check(func); } return callPPPPPI(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, __functionAddress); } /** * Suggest a launch configuration with reasonable occupancy. * *

Returns in {@code *blockSize} a reasonable block size that can achieve the maximum occupancy (or, the maximum number of active warps with the fewest * blocks per multiprocessor), and in {@code *minGridSize} the minimum grid size to achieve the maximum occupancy.

* *

If {@code blockSizeLimit} is 0, the configurator will use the maximum block size permitted by the device / function instead.

* *

If per-block dynamic shared memory allocation is not needed, the user should leave both {@code blockSizeToDynamicSMemSize} and {@code dynamicSMemSize} * as 0.

* *

If per-block dynamic shared memory allocation is needed, then if the dynamic shared memory size is constant regardless of block size, the size should * be passed through {@code dynamicSMemSize}, and {@code blockSizeToDynamicSMemSize} should be {@code NULL}.

* *

Otherwise, if the per-block dynamic shared memory size varies with different block sizes, the user needs to provide a unary function through {@code * blockSizeToDynamicSMemSize} that computes the dynamic shared memory needed by {@code func} for any given block size. {@code dynamicSMemSize} is * ignored. An example signature is:

* *

     * // Take block size, returns dynamic shared memory needed
     * size_t blockToSmem(int blockSize);
* * @param minGridSize returned minimum grid size needed to achieve the maximum occupancy * @param blockSize returned maximum block size that can achieve the maximum occupancy * @param func kernel for which launch configuration is calculated * @param blockSizeToDynamicSMemSize a function that calculates how much per-block dynamic shared memory {@code func} uses based on the block size * @param dynamicSMemSize dynamic shared memory usage intended, in bytes * @param blockSizeLimit the maximum block size {@code func} is designed to handle */ @NativeType("CUresult") public static int cuOccupancyMaxPotentialBlockSize(@NativeType("int *") IntBuffer minGridSize, @NativeType("int *") IntBuffer blockSize, @NativeType("CUfunction") long func, @Nullable @NativeType("size_t (*) (int)") CUoccupancyB2DSizeI blockSizeToDynamicSMemSize, @NativeType("size_t") long dynamicSMemSize, int blockSizeLimit) { if (CHECKS) { check(minGridSize, 1); check(blockSize, 1); } return ncuOccupancyMaxPotentialBlockSize(memAddress(minGridSize), memAddress(blockSize), func, memAddressSafe(blockSizeToDynamicSMemSize), dynamicSMemSize, blockSizeLimit); } // --- [ cuOccupancyMaxPotentialBlockSizeWithFlags ] --- /** Unsafe version of: {@link #cuOccupancyMaxPotentialBlockSizeWithFlags OccupancyMaxPotentialBlockSizeWithFlags} */ public static int ncuOccupancyMaxPotentialBlockSizeWithFlags(long minGridSize, long blockSize, long func, long blockSizeToDynamicSMemSize, long dynamicSMemSize, int blockSizeLimit, int flags) { long __functionAddress = Functions.OccupancyMaxPotentialBlockSizeWithFlags; if (CHECKS) { check(__functionAddress); check(func); } return callPPPPPI(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags, __functionAddress); } /** * Suggest a launch configuration with reasonable occupancy. * *

An extended version of {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize}. In addition to arguments passed to {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize}, * {@link #cuOccupancyMaxPotentialBlockSizeWithFlags OccupancyMaxPotentialBlockSizeWithFlags} also takes a {@code Flags} parameter.

* *

The {@code Flags} parameter controls how special cases are handled. The valid flags are:

* *
    *
  • {@link #CU_OCCUPANCY_DEFAULT OCCUPANCY_DEFAULT}, which maintains the default behavior as {@link #cuOccupancyMaxPotentialBlockSize OccupancyMaxPotentialBlockSize};
  • *
  • {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE}, which suppresses the default behavior on platform where global caching affects occupancy. On such * platforms, the launch configurations that produces maximal occupancy might not support global caching. Setting * {@link #CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE OCCUPANCY_DISABLE_CACHING_OVERRIDE} guarantees that the the produced launch configuration is global caching compatible at a potential cost of * occupancy. More information can be found about this feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide.
  • *
* * @param minGridSize returned minimum grid size needed to achieve the maximum occupancy * @param blockSize returned maximum block size that can achieve the maximum occupancy * @param func kernel for which launch configuration is calculated * @param blockSizeToDynamicSMemSize a function that calculates how much per-block dynamic shared memory {@code func} uses based on the block size * @param dynamicSMemSize dynamic shared memory usage intended, in bytes * @param blockSizeLimit the maximum block size {@code func} is designed to handle * @param flags options */ @NativeType("CUresult") public static int cuOccupancyMaxPotentialBlockSizeWithFlags(@NativeType("int *") IntBuffer minGridSize, @NativeType("int *") IntBuffer blockSize, @NativeType("CUfunction") long func, @Nullable @NativeType("size_t (*) (int)") CUoccupancyB2DSizeI blockSizeToDynamicSMemSize, @NativeType("size_t") long dynamicSMemSize, int blockSizeLimit, @NativeType("unsigned int") int flags) { if (CHECKS) { check(minGridSize, 1); check(blockSize, 1); } return ncuOccupancyMaxPotentialBlockSizeWithFlags(memAddress(minGridSize), memAddress(blockSize), func, memAddressSafe(blockSizeToDynamicSMemSize), dynamicSMemSize, blockSizeLimit, flags); } // --- [ cuOccupancyAvailableDynamicSMemPerBlock ] --- /** Unsafe version of: {@link #cuOccupancyAvailableDynamicSMemPerBlock OccupancyAvailableDynamicSMemPerBlock} */ public static int ncuOccupancyAvailableDynamicSMemPerBlock(long dynamicSmemSize, long func, int numBlocks, int blockSize) { long __functionAddress = Functions.OccupancyAvailableDynamicSMemPerBlock; if (CHECKS) { check(__functionAddress); check(func); } return callPPI(dynamicSmemSize, func, numBlocks, blockSize, __functionAddress); } /** * Returns dynamic shared memory available per block when launching {@code numBlocks} blocks on SM. * *

Returns in {@code *dynamicSmemSize} the maximum size of dynamic shared memory to allow {@code numBlocks} blocks per SM.

* * @param dynamicSmemSize returned maximum dynamic shared memory * @param func kernel function for which occupancy is calculated * @param numBlocks number of blocks to fit on SM * @param blockSize size of the blocks */ @NativeType("CUresult") public static int cuOccupancyAvailableDynamicSMemPerBlock(@NativeType("size_t *") PointerBuffer dynamicSmemSize, @NativeType("CUfunction") long func, int numBlocks, int blockSize) { if (CHECKS) { check(dynamicSmemSize, 1); } return ncuOccupancyAvailableDynamicSMemPerBlock(memAddress(dynamicSmemSize), func, numBlocks, blockSize); } // --- [ cuTexRefSetArray ] --- /** * Binds an array as a texture reference. (Deprecated) * *

Binds the CUDA array {@code hArray} to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture * reference is superseded by this function. {@code Flags} must be set to {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}. Any CUDA array previously bound to {@code hTexRef} is * unbound.

* * @param hTexRef texture reference to bind * @param hArray array to bind * @param Flags options (must be {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}) */ @NativeType("CUresult") public static int cuTexRefSetArray(@NativeType("CUtexref") long hTexRef, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.TexRefSetArray; if (CHECKS) { check(hTexRef); check(hArray); } return callPPI(hTexRef, hArray, Flags, __functionAddress); } // --- [ cuTexRefSetMipmappedArray ] --- /** * Binds a mipmapped array to a texture reference. (Deprecated) * *

Binds the CUDA mipmapped array {@code hMipmappedArray} to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated * with the texture reference is superseded by this function. {@code Flags} must be set to {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}. Any CUDA array previously bound to * {@code hTexRef} is unbound.

* * @param hTexRef texture reference to bind * @param hMipmappedArray mipmapped array to bind * @param Flags options (must be {@link #CU_TRSA_OVERRIDE_FORMAT TRSA_OVERRIDE_FORMAT}) */ @NativeType("CUresult") public static int cuTexRefSetMipmappedArray(@NativeType("CUtexref") long hTexRef, @NativeType("CUmipmappedArray") long hMipmappedArray, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.TexRefSetMipmappedArray; if (CHECKS) { check(hTexRef); check(hMipmappedArray); } return callPPI(hTexRef, hMipmappedArray, Flags, __functionAddress); } // --- [ cuTexRefSetAddress ] --- /** Unsafe version of: {@link #cuTexRefSetAddress TexRefSetAddress} */ public static int ncuTexRefSetAddress(long ByteOffset, long hTexRef, long dptr, long bytes) { long __functionAddress = Functions.TexRefSetAddress; if (CHECKS) { check(hTexRef); check(dptr); } return callPPPPI(ByteOffset, hTexRef, dptr, bytes, __functionAddress); } /** * Binds an address as a texture reference. (Deprecated) * *

Binds a linear address range to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture reference * is superseded by this function. Any memory previously bound to {@code hTexRef} is unbound.

* *

Since the hardware enforces an alignment requirement on texture base addresses, {@link #cuTexRefSetAddress TexRefSetAddress} passes back a byte offset in {@code *ByteOffset} * that must be applied to texture fetches in order to read from the desired memory. This offset must be divided by the texel size and passed to kernels * that read from the texture so they can be applied to the {@code tex1Dfetch()} function.

* *

If the device memory pointer was returned from {@link #cuMemAlloc MemAlloc}, the offset is guaranteed to be 0 and {@code NULL} may be passed as the {@code ByteOffset} * parameter.

* *

The total number of elements (or texels) in the linear address range cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH}. The number of * elements is computed as ({@code bytes} / {@code bytesPerElement}), where {@code bytesPerElement} is determined from the data format and number of * components set using {@link #cuTexRefSetFormat TexRefSetFormat}.

* * @param ByteOffset returned byte offset * @param hTexRef texture reference to bind * @param dptr device pointer to bind * @param bytes size of memory to bind in bytes */ @NativeType("CUresult") public static int cuTexRefSetAddress(@NativeType("size_t *") PointerBuffer ByteOffset, @NativeType("CUtexref") long hTexRef, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long bytes) { if (CHECKS) { check(ByteOffset, 1); } return ncuTexRefSetAddress(memAddress(ByteOffset), hTexRef, dptr, bytes); } // --- [ cuTexRefSetAddress2D ] --- /** Unsafe version of: {@link #cuTexRefSetAddress2D TexRefSetAddress2D} */ public static int ncuTexRefSetAddress2D(long hTexRef, long desc, long dptr, long Pitch) { long __functionAddress = Functions.TexRefSetAddress2D; if (CHECKS) { check(hTexRef); check(dptr); } return callPPPPI(hTexRef, desc, dptr, Pitch, __functionAddress); } /** * Binds an address as a 2D texture reference. (Deprecated) * *

Binds a linear address range to the texture reference {@code hTexRef}. Any previous address or CUDA array state associated with the texture reference * is superseded by this function. Any memory previously bound to {@code hTexRef} is unbound.

* *

Using a {@code tex2D()} function inside a kernel requires a call to either {@link #cuTexRefSetArray TexRefSetArray} to bind the corresponding texture reference to an array, * or {@link #cuTexRefSetAddress2D TexRefSetAddress2D} to bind the texture reference to linear memory.

* *

Function calls to {@link #cuTexRefSetFormat TexRefSetFormat} cannot follow calls to {@link #cuTexRefSetAddress2D TexRefSetAddress2D} for the same texture reference.

* *

It is required that {@code dptr} be aligned to the appropriate hardware-specific texture alignment. You can query this value using the device attribute * {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. If an unaligned {@code dptr} is supplied, {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* *

{@code Pitch} has to be aligned to the hardware-specific texture pitch alignment. This value can be queried using the device attribute * {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT}. If an unaligned {@code Pitch} is supplied, {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* *

{@code Width} and {@code Height}, which are specified in elements (or texels), cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} and * {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} respectively. {@code Pitch}, which is specified in bytes, cannot exceed * {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH}.

* * @param hTexRef texture reference to bind * @param desc descriptor of CUDA array * @param dptr device pointer to bind * @param Pitch line pitch in bytes */ @NativeType("CUresult") public static int cuTexRefSetAddress2D(@NativeType("CUtexref") long hTexRef, @NativeType("CUDA_ARRAY_DESCRIPTOR const *") CUDA_ARRAY_DESCRIPTOR desc, @NativeType("CUdeviceptr") long dptr, @NativeType("size_t") long Pitch) { return ncuTexRefSetAddress2D(hTexRef, desc.address(), dptr, Pitch); } // --- [ cuTexRefSetFormat ] --- /** * Sets the format for a texture reference. (Deprecated) * *

Specifies the format of the data to be read by the texture reference {@code hTexRef}. {@code fmt} and {@code NumPackedComponents} are exactly analogous * to the {@code Format} and {@code NumChannels} members of the {@link CUDA_ARRAY_DESCRIPTOR} structure: They specify the format of each component and the * number of components per array element.

* * @param hTexRef texture reference * @param fmt format to set * @param NumPackedComponents number of components per array element */ @NativeType("CUresult") public static int cuTexRefSetFormat(@NativeType("CUtexref") long hTexRef, @NativeType("CUarray_format") int fmt, int NumPackedComponents) { long __functionAddress = Functions.TexRefSetFormat; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, fmt, NumPackedComponents, __functionAddress); } // --- [ cuTexRefSetAddressMode ] --- /** * Sets the addressing mode for a texture reference. (Deprecated) * *

Specifies the addressing mode {@code am} for the given dimension {@code dim} of the texture reference {@code hTexRef}. If {@code dim} is zero, the * addressing mode is applied to the first parameter of the functions used to fetch from the texture; if {@code dim} is 1, the second, and so on.

* *

Note that this call has no effect if {@code hTexRef} is bound to linear memory. Also, if the flag, {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, is not set, the * only supported address mode is {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP}.

* * @param hTexRef texture reference * @param dim dimension * @param am addressing mode to set */ @NativeType("CUresult") public static int cuTexRefSetAddressMode(@NativeType("CUtexref") long hTexRef, int dim, @NativeType("CUaddress_mode") int am) { long __functionAddress = Functions.TexRefSetAddressMode; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, dim, am, __functionAddress); } // --- [ cuTexRefSetFilterMode ] --- /** * Sets the filtering mode for a texture reference. (Deprecated) * *

Specifies the filtering mode {@code fm} to be used when reading memory through the texture reference {@code hTexRef}.

* *

Note that this call has no effect if {@code hTexRef} is bound to linear memory.

* * @param hTexRef texture reference * @param fm filtering mode to set */ @NativeType("CUresult") public static int cuTexRefSetFilterMode(@NativeType("CUtexref") long hTexRef, @NativeType("CUfilter_mode") int fm) { long __functionAddress = Functions.TexRefSetFilterMode; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, fm, __functionAddress); } // --- [ cuTexRefSetMipmapFilterMode ] --- /** * Sets the mipmap filtering mode for a texture reference (Deprecated) * *

Specifies the mipmap filtering mode {@code fm} to be used when reading memory through the texture reference {@code hTexRef}.

* *

Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.

* * @param hTexRef texture reference * @param fm filtering mode to set */ @NativeType("CUresult") public static int cuTexRefSetMipmapFilterMode(@NativeType("CUtexref") long hTexRef, @NativeType("CUfilter_mode") int fm) { long __functionAddress = Functions.TexRefSetMipmapFilterMode; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, fm, __functionAddress); } // --- [ cuTexRefSetMipmapLevelBias ] --- /** * Sets the mipmap level bias for a texture reference. (Deprecated) * *

Specifies the mipmap level bias {@code bias} to be added to the specified mipmap level when reading memory through the texture reference * {@code hTexRef}.

* *

Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.

* * @param hTexRef texture reference * @param bias mipmap level bias */ @NativeType("CUresult") public static int cuTexRefSetMipmapLevelBias(@NativeType("CUtexref") long hTexRef, float bias) { long __functionAddress = Functions.TexRefSetMipmapLevelBias; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, bias, __functionAddress); } // --- [ cuTexRefSetMipmapLevelClamp ] --- /** * Sets the mipmap min/max mipmap level clamps for a texture reference. (Deprecated) * *

Specifies the min/max mipmap level clamps, {@code minMipmapLevelClamp} and {@code maxMipmapLevelClamp} respectively, to be used when reading memory * through the texture reference {@code hTexRef}.

* *

Note that this call has no effect if {@code hTexRef} is not bound to a mipmapped array.

* * @param hTexRef texture reference * @param minMipmapLevelClamp mipmap min level clamp * @param maxMipmapLevelClamp mipmap max level clamp */ @NativeType("CUresult") public static int cuTexRefSetMipmapLevelClamp(@NativeType("CUtexref") long hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { long __functionAddress = Functions.TexRefSetMipmapLevelClamp; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp, __functionAddress); } // --- [ cuTexRefSetMaxAnisotropy ] --- /** * Sets the maximum anisotropy for a texture reference. (Deprecated) * *

Specifies the maximum anisotropy {@code maxAniso} to be used when reading memory through the texture reference {@code hTexRef}.

* *

Note that this call has no effect if {@code hTexRef} is bound to linear memory.

* * @param hTexRef texture reference * @param maxAniso maximum anisotropy */ @NativeType("CUresult") public static int cuTexRefSetMaxAnisotropy(@NativeType("CUtexref") long hTexRef, @NativeType("unsigned int") int maxAniso) { long __functionAddress = Functions.TexRefSetMaxAnisotropy; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, maxAniso, __functionAddress); } // --- [ cuTexRefSetBorderColor ] --- /** Unsafe version of: {@link #cuTexRefSetBorderColor TexRefSetBorderColor} */ public static int ncuTexRefSetBorderColor(long hTexRef, long pBorderColor) { long __functionAddress = Functions.TexRefSetBorderColor; if (CHECKS) { check(hTexRef); } return callPPI(hTexRef, pBorderColor, __functionAddress); } /** * Sets the border color for a texture reference. (Deprecated) * *

Specifies the value of the RGBA color via the {@code pBorderColor} to the texture reference {@code hTexRef}. The color value supports only float type * and holds color components in the following sequence: {@code pBorderColor[0]} holds 'R' component {@code pBorderColor[1]} holds 'G' component * {@code pBorderColor[2]} holds 'B' component {@code pBorderColor[3]} holds 'A' component.

* *

Note that the color values can be set only when the Address mode is set to {@link #CU_TR_ADDRESS_MODE_BORDER TR_ADDRESS_MODE_BORDER} using {@link #cuTexRefSetAddressMode TexRefSetAddressMode}. Applications using * integer border color values have to "reinterpret_cast" their values to float.

* * @param hTexRef texture reference * @param pBorderColor RGBA color */ @NativeType("CUresult") public static int cuTexRefSetBorderColor(@NativeType("CUtexref") long hTexRef, @NativeType("float *") FloatBuffer pBorderColor) { if (CHECKS) { check(pBorderColor, 4); } return ncuTexRefSetBorderColor(hTexRef, memAddress(pBorderColor)); } // --- [ cuTexRefSetFlags ] --- /** * Sets the flags for a texture reference. (Deprecated) * *

Specifies optional flags via {@code Flags} to specify the behavior of data returned through the texture reference {@code hTexRef}. The valid flags are:

* *
    *
  • {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER}, which suppresses the default behavior of having the texture promote integer data to floating point data in the range [0, * 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified;
  • *
  • {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is the * width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension;
  • *
  • {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION}, which disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering * performance by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
  • *
* * @param hTexRef texture reference * @param Flags optional flags to set */ @NativeType("CUresult") public static int cuTexRefSetFlags(@NativeType("CUtexref") long hTexRef, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.TexRefSetFlags; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, Flags, __functionAddress); } // --- [ cuTexRefGetAddress ] --- /** Unsafe version of: {@link #cuTexRefGetAddress TexRefGetAddress} */ public static int ncuTexRefGetAddress(long pdptr, long hTexRef) { long __functionAddress = Functions.TexRefGetAddress; if (CHECKS) { check(hTexRef); } return callPPI(pdptr, hTexRef, __functionAddress); } /** * Gets the address associated with a texture reference. (Deprecated) * *

Returns in {@code *pdptr} the base address bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the texture * reference is not bound to any device memory range.

* * @param pdptr returned device address * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetAddress(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pdptr, 1); } return ncuTexRefGetAddress(memAddress(pdptr), hTexRef); } // --- [ cuTexRefGetArray ] --- /** Unsafe version of: {@link #cuTexRefGetArray TexRefGetArray} */ public static int ncuTexRefGetArray(long phArray, long hTexRef) { long __functionAddress = Functions.TexRefGetArray; if (CHECKS) { check(hTexRef); } return callPPI(phArray, hTexRef, __functionAddress); } /** * Gets the array bound to a texture reference. (Deprecated) * *

Returns in {@code *phArray} the CUDA array bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the texture * reference is not bound to any CUDA array.

* * @param phArray returned array * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetArray(@NativeType("CUarray *") PointerBuffer phArray, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(phArray, 1); } return ncuTexRefGetArray(memAddress(phArray), hTexRef); } // --- [ cuTexRefGetMipmappedArray ] --- /** Unsafe version of: {@link #cuTexRefGetMipmappedArray TexRefGetMipmappedArray} */ public static int ncuTexRefGetMipmappedArray(long phMipmappedArray, long hTexRef) { long __functionAddress = Functions.TexRefGetMipmappedArray; if (CHECKS) { check(hTexRef); } return callPPI(phMipmappedArray, hTexRef, __functionAddress); } /** * Gets the mipmapped array bound to a texture reference. (Deprecated) * *

Returns in {@code *phMipmappedArray} the CUDA mipmapped array bound to the texture reference {@code hTexRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if * the texture reference is not bound to any CUDA mipmapped array.

* * @param phMipmappedArray returned mipmapped array * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer phMipmappedArray, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(phMipmappedArray, 1); } return ncuTexRefGetMipmappedArray(memAddress(phMipmappedArray), hTexRef); } // --- [ cuTexRefGetAddressMode ] --- /** Unsafe version of: {@link #cuTexRefGetAddressMode TexRefGetAddressMode} */ public static int ncuTexRefGetAddressMode(long pam, long hTexRef, int dim) { long __functionAddress = Functions.TexRefGetAddressMode; if (CHECKS) { check(hTexRef); } return callPPI(pam, hTexRef, dim, __functionAddress); } /** * Gets the addressing mode used by a texture reference. (Deprecated) * *

Returns in {@code *pam} the addressing mode corresponding to the dimension {@code dim} of the texture reference {@code hTexRef}. Currently, the only * valid value for {@code dim} are 0 and 1.

* * @param pam returned addressing mode * @param hTexRef texture reference * @param dim dimension */ @NativeType("CUresult") public static int cuTexRefGetAddressMode(@NativeType("CUaddress_mode *") IntBuffer pam, @NativeType("CUtexref") long hTexRef, int dim) { if (CHECKS) { check(pam, 1); } return ncuTexRefGetAddressMode(memAddress(pam), hTexRef, dim); } // --- [ cuTexRefGetFilterMode ] --- /** Unsafe version of: {@link #cuTexRefGetFilterMode TexRefGetFilterMode} */ public static int ncuTexRefGetFilterMode(long pfm, long hTexRef) { long __functionAddress = Functions.TexRefGetFilterMode; if (CHECKS) { check(hTexRef); } return callPPI(pfm, hTexRef, __functionAddress); } /** * Gets the filter-mode used by a texture reference. (Deprecated) * *

Returns in {@code *pfm} the filtering mode of the texture reference {@code hTexRef}.

* * @param pfm returned filtering mode * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetFilterMode(@NativeType("CUfilter_mode *") IntBuffer pfm, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pfm, 1); } return ncuTexRefGetFilterMode(memAddress(pfm), hTexRef); } // --- [ cuTexRefGetFormat ] --- /** Unsafe version of: {@link #cuTexRefGetFormat TexRefGetFormat} */ public static int ncuTexRefGetFormat(long pFormat, long pNumChannels, long hTexRef) { long __functionAddress = Functions.TexRefGetFormat; if (CHECKS) { check(hTexRef); } return callPPPI(pFormat, pNumChannels, hTexRef, __functionAddress); } /** * Gets the format used by a texture reference. (Deprecated) * *

Returns in {@code *pFormat} and {@code *pNumChannels} the format and number of components of the CUDA array bound to the texture reference * {@code hTexRef}. If {@code pFormat} or {@code pNumChannels} is {@code NULL}, it will be ignored.

* * @param pFormat returned format * @param pNumChannels returned number of components * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetFormat(@NativeType("CUarray_format *") IntBuffer pFormat, @Nullable @NativeType("int *") IntBuffer pNumChannels, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pFormat, 1); checkSafe(pNumChannels, 1); } return ncuTexRefGetFormat(memAddress(pFormat), memAddressSafe(pNumChannels), hTexRef); } // --- [ cuTexRefGetMipmapFilterMode ] --- /** Unsafe version of: {@link #cuTexRefGetMipmapFilterMode TexRefGetMipmapFilterMode} */ public static int ncuTexRefGetMipmapFilterMode(long pfm, long hTexRef) { long __functionAddress = Functions.TexRefGetMipmapFilterMode; if (CHECKS) { check(hTexRef); } return callPPI(pfm, hTexRef, __functionAddress); } /** * Gets the mipmap filtering mode for a texture reference. (Deprecated) * *

Returns the mipmap filtering mode in {@code pfm} that's used when reading memory through the texture reference {@code hTexRef}.

* * @param pfm returned mipmap filtering mode * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetMipmapFilterMode(@NativeType("CUfilter_mode *") IntBuffer pfm, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pfm, 1); } return ncuTexRefGetMipmapFilterMode(memAddress(pfm), hTexRef); } // --- [ cuTexRefGetMipmapLevelBias ] --- /** Unsafe version of: {@link #cuTexRefGetMipmapLevelBias TexRefGetMipmapLevelBias} */ public static int ncuTexRefGetMipmapLevelBias(long pbias, long hTexRef) { long __functionAddress = Functions.TexRefGetMipmapLevelBias; if (CHECKS) { check(hTexRef); } return callPPI(pbias, hTexRef, __functionAddress); } /** * Gets the mipmap level bias for a texture reference. (Deprecated) * *

Returns the mipmap level bias in {@code pBias} that's added to the specified mipmap level when reading memory through the texture reference {@code * hTexRef}.

* * @param pbias returned mipmap level bias * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetMipmapLevelBias(@NativeType("float *") FloatBuffer pbias, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pbias, 1); } return ncuTexRefGetMipmapLevelBias(memAddress(pbias), hTexRef); } // --- [ cuTexRefGetMipmapLevelClamp ] --- /** Unsafe version of: {@link #cuTexRefGetMipmapLevelClamp TexRefGetMipmapLevelClamp} */ public static int ncuTexRefGetMipmapLevelClamp(long pminMipmapLevelClamp, long pmaxMipmapLevelClamp, long hTexRef) { long __functionAddress = Functions.TexRefGetMipmapLevelClamp; if (CHECKS) { check(hTexRef); } return callPPPI(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef, __functionAddress); } /** * Gets the min/max mipmap level clamps for a texture reference. (Deprecated) * *

Returns the min/max mipmap level clamps in {@code pminMipmapLevelClamp} and {@code pmaxMipmapLevelClamp} that's used when reading memory through the * texture reference {@code hTexRef}.

* * @param pminMipmapLevelClamp returned mipmap min level clamp * @param pmaxMipmapLevelClamp returned mipmap max level clamp * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetMipmapLevelClamp(@NativeType("float *") FloatBuffer pminMipmapLevelClamp, @NativeType("float *") FloatBuffer pmaxMipmapLevelClamp, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pminMipmapLevelClamp, 1); check(pmaxMipmapLevelClamp, 1); } return ncuTexRefGetMipmapLevelClamp(memAddress(pminMipmapLevelClamp), memAddress(pmaxMipmapLevelClamp), hTexRef); } // --- [ cuTexRefGetMaxAnisotropy ] --- /** Unsafe version of: {@link #cuTexRefGetMaxAnisotropy TexRefGetMaxAnisotropy} */ public static int ncuTexRefGetMaxAnisotropy(long pmaxAniso, long hTexRef) { long __functionAddress = Functions.TexRefGetMaxAnisotropy; if (CHECKS) { check(hTexRef); } return callPPI(pmaxAniso, hTexRef, __functionAddress); } /** * Gets the maximum anisotropy for a texture reference. (Deprecated) * *

Returns the maximum anisotropy in {@code pmaxAniso} that's used when reading memory through the texture reference {@code hTexRef}.

* * @param pmaxAniso returned maximum anisotropy * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetMaxAnisotropy(@NativeType("int *") IntBuffer pmaxAniso, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pmaxAniso, 1); } return ncuTexRefGetMaxAnisotropy(memAddress(pmaxAniso), hTexRef); } // --- [ cuTexRefGetBorderColor ] --- /** Unsafe version of: {@link #cuTexRefGetBorderColor TexRefGetBorderColor} */ public static int ncuTexRefGetBorderColor(long pBorderColor, long hTexRef) { long __functionAddress = Functions.TexRefGetBorderColor; if (CHECKS) { check(hTexRef); } return callPPI(pBorderColor, hTexRef, __functionAddress); } /** * Gets the border color used by a texture reference. (Deprecated) * *

Returns in {@code pBorderColor}, values of the RGBA color used by the texture reference {@code hTexRef}. The color value is of type float and holds * color components in the following sequence: pBorderColor[0] holds 'R' component pBorderColor[1] holds 'G' component pBorderColor[2] holds 'B' component * pBorderColor[3] holds 'A' component

* * @param pBorderColor returned Type and Value of RGBA color * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetBorderColor(@NativeType("float *") FloatBuffer pBorderColor, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pBorderColor, 4); } return ncuTexRefGetBorderColor(memAddress(pBorderColor), hTexRef); } // --- [ cuTexRefGetFlags ] --- /** Unsafe version of: {@link #cuTexRefGetFlags TexRefGetFlags} */ public static int ncuTexRefGetFlags(long pFlags, long hTexRef) { long __functionAddress = Functions.TexRefGetFlags; if (CHECKS) { check(hTexRef); } return callPPI(pFlags, hTexRef, __functionAddress); } /** * Gets the flags used by a texture reference. (Deprecated) * *

Returns in {@code *pFlags} the flags of the texture reference {@code hTexRef}.

* * @param pFlags returned flags * @param hTexRef texture reference */ @NativeType("CUresult") public static int cuTexRefGetFlags(@NativeType("unsigned int *") IntBuffer pFlags, @NativeType("CUtexref") long hTexRef) { if (CHECKS) { check(pFlags, 1); } return ncuTexRefGetFlags(memAddress(pFlags), hTexRef); } // --- [ cuTexRefCreate ] --- /** Unsafe version of: {@link #cuTexRefCreate TexRefCreate} */ public static int ncuTexRefCreate(long pTexRef) { long __functionAddress = Functions.TexRefCreate; return callPI(pTexRef, __functionAddress); } /** * Creates a texture reference. (Deprecated) * *

Creates a texture reference and returns its handle in {@code *pTexRef}. Once created, the application must call {@link #cuTexRefSetArray TexRefSetArray} or * {@link #cuTexRefSetAddress TexRefSetAddress} to associate the reference with allocated memory. Other texture reference functions are used to specify the format and * interpretation (addressing, filtering, etc.) to be used when the memory is read through this texture reference.

* * @param pTexRef returned texture reference */ @NativeType("CUresult") public static int cuTexRefCreate(@NativeType("CUtexref *") PointerBuffer pTexRef) { if (CHECKS) { check(pTexRef, 1); } return ncuTexRefCreate(memAddress(pTexRef)); } // --- [ cuTexRefDestroy ] --- /** * Destroys a texture reference. (Deprecated) * *

Destroys the texture reference specified by {@code hTexRef}.

* * @param hTexRef texture reference to destroy */ @NativeType("CUresult") public static int cuTexRefDestroy(@NativeType("CUtexref") long hTexRef) { long __functionAddress = Functions.TexRefDestroy; if (CHECKS) { check(hTexRef); } return callPI(hTexRef, __functionAddress); } // --- [ cuSurfRefSetArray ] --- /** * Sets the CUDA array for a surface reference.Deprecated: * *

Sets the CUDA array {@code hArray} to be read and written by the surface reference {@code hSurfRef}. Any previous CUDA array state associated with the * surface reference is superseded by this function. {@code Flags} must be set to 0. The {@link #CUDA_ARRAY3D_SURFACE_LDST} flag must have been set for the CUDA * array. Any CUDA array previously bound to {@code hSurfRef} is unbound.

* * @param hSurfRef surface reference handle * @param hArray CUDA array handle * @param Flags set to 0 */ @NativeType("CUresult") public static int cuSurfRefSetArray(@NativeType("CUsurfref") long hSurfRef, @NativeType("CUarray") long hArray, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.SurfRefSetArray; if (CHECKS) { check(hSurfRef); check(hArray); } return callPPI(hSurfRef, hArray, Flags, __functionAddress); } // --- [ cuSurfRefGetArray ] --- /** Unsafe version of: {@link #cuSurfRefGetArray SurfRefGetArray} */ public static int ncuSurfRefGetArray(long phArray, long hSurfRef) { long __functionAddress = Functions.SurfRefGetArray; if (CHECKS) { check(hSurfRef); } return callPPI(phArray, hSurfRef, __functionAddress); } /** * Passes back the CUDA array bound to a surface reference. (Deprecated) * *

Returns in {@code *phArray} the CUDA array bound to the surface reference {@code hSurfRef}, or returns {@link #CUDA_ERROR_INVALID_VALUE} if the surface * reference is not bound to any CUDA array.

* * @param phArray surface reference handle * @param hSurfRef surface reference handle */ @NativeType("CUresult") public static int cuSurfRefGetArray(@NativeType("CUarray *") PointerBuffer phArray, @NativeType("CUsurfref") long hSurfRef) { if (CHECKS) { check(phArray, 1); } return ncuSurfRefGetArray(memAddress(phArray), hSurfRef); } // --- [ cuTexObjectCreate ] --- /** Unsafe version of: {@link #cuTexObjectCreate TexObjectCreate} */ public static int ncuTexObjectCreate(long pTexObject, long pResDesc, long pTexDesc, long pResViewDesc) { long __functionAddress = Functions.TexObjectCreate; if (CHECKS) { check(__functionAddress); } return callPPPPI(pTexObject, pResDesc, pTexDesc, pResViewDesc, __functionAddress); } /** * Creates a texture object. * *

Creates a texture object and returns it in {@code pTexObject}. {@code pResDesc} describes the data to texture from. {@code pTexDesc} describes how the * data should be sampled. {@code pResViewDesc} is an optional argument that specifies an alternate format for the data described by {@code pResDesc}, and * also describes the subresource region to restrict access to when texturing. {@code pResViewDesc} can only be specified if the type of resource is a * CUDA array or a CUDA mipmapped array.

* *

Texture objects are only supported on devices of compute capability 3.0 or higher. Additionally, a texture object is an opaque value, and, as such, * should only be accessed through CUDA API calls.

* *
    *
  • If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY}, {@code CUDA_RESOURCE_DESC::res::array::hArray} must be set to a valid CUDA * array handle.
  • *
  • If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_MIPMAPPED_ARRAY RESOURCE_TYPE_MIPMAPPED_ARRAY}, {@code CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray} must be * set to a valid CUDA mipmapped array handle.
  • *
  • If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}, {@code CUDA_RESOURCE_DESC::res::linear::devPtr} must be set to a valid * device pointer, that is aligned to {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. {@code CUDA_RESOURCE_DESC::res::linear::format} and * {@code CUDA_RESOURCE_DESC::res::linear::numChannels} describe the format of each component and the number of components per array element. * {@code CUDA_RESOURCE_DESC::res::linear::sizeInBytes} specifies the size of the array in bytes. The total number of elements in the linear address * range cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH}. The number of elements is computed as * {@code (sizeInBytes / (sizeof(format) * numChannels)).}
  • *
  • If {@code CUDA_RESOURCE_DESC::resType} is set to {@link #CU_RESOURCE_TYPE_PITCH2D RESOURCE_TYPE_PITCH2D}, {@code CUDA_RESOURCE_DESC::res::pitch2D::devPtr} must be set to a valid * device pointer, that is aligned to {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT}. {@code CUDA_RESOURCE_DESC::res::pitch2D::format} and * {@code CUDA_RESOURCE_DESC::res::pitch2D::numChannels} describe the format of each component and the number of components per array element. * {@code CUDA_RESOURCE_DESC::res::pitch2D::width} and {@code CUDA_RESOURCE_DESC::res::pitch2D::height} specify the width and height of the array in * elements, and cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH} and {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT} respectively. * {@code CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes} specifies the pitch between two rows in bytes and has to be aligned to * {@link #CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT}. Pitch cannot exceed {@link #CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH}.
  • *
  • {@code flags} must be set to zero.
  • *
* *
    *
  • {@code CUDA_TEXTURE_DESC::addressMode} specifies the addressing mode for each dimension of the texture data. This is ignored if * {@code CUDA_RESOURCE_DESC::resType} is {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}. Also, if the flag, {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES} is not set, the only supported * address mode is {@link #CU_TR_ADDRESS_MODE_CLAMP TR_ADDRESS_MODE_CLAMP}.
  • *
  • {@code CUDA_TEXTURE_DESC::filterMode} specifies the filtering mode to be used when fetching from the texture. This is ignored if * {@code CUDA_RESOURCE_DESC::resType} is {@link #CU_RESOURCE_TYPE_LINEAR RESOURCE_TYPE_LINEAR}.
  • *
  • {@code CUDA_TEXTURE_DESC::flags} can be any combination of the following: * *
      *
    • {@link #CU_TRSF_READ_AS_INTEGER TRSF_READ_AS_INTEGER}, which suppresses the default behavior of having the texture promote integer data to floating point data in the range [0, * 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified.
    • *
    • {@link #CU_TRSF_NORMALIZED_COORDINATES TRSF_NORMALIZED_COORDINATES}, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is the * width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note that for * CUDA mipmapped arrays, this flag has to be set.
    • *
    • {@link #CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION TRSF_DISABLE_TRILINEAR_OPTIMIZATION}, which disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering * performance by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
    • *
  • *
  • {@code CUDA_TEXTURE_DESC::maxAnisotropy} specifies the maximum anisotropy ratio to be used when doing anisotropic filtering. This value will be * clamped to the range [1,16].
  • *
  • {@code CUDA_TEXTURE_DESC::mipmapFilterMode} specifies the filter mode when the calculated mipmap level lies between two defined mipmap levels.
  • *
  • {@code CUDA_TEXTURE_DESC::mipmapLevelBias} specifies the offset to be applied to the calculated mipmap level.
  • *
  • {@code CUDA_TEXTURE_DESC::minMipmapLevelClamp} specifies the lower end of the mipmap level range to clamp access to.
  • *
  • {@code CUDA_TEXTURE_DESC::maxMipmapLevelClamp} specifies the upper end of the mipmap level range to clamp access to.
  • *
* *
    *
  • {@code CUDA_RESOURCE_VIEW_DESC::format} specifies how the data contained in the CUDA array or CUDA mipmapped array should be interpreted. Note that * this can incur a change in size of the texture data. If the resource view format is a block compressed format, then the underlying CUDA array or * CUDA mipmapped array has to have a base of format {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32}. with 2 or 4 channels, depending on the block compressed format. For * ex., BC1 and BC4 require the underlying CUDA array to have a format of {@link #CU_AD_FORMAT_UNSIGNED_INT32 AD_FORMAT_UNSIGNED_INT32} with 2 channels. The other BC formats require * the underlying resource to have the same base format but with 4 channels.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::width} specifies the new width of the texture data. If the resource view format is a block compressed format, this * value has to be 4 times the original width of the resource. For non block compressed formats, this value has to be equal to that of the original * resource.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::height} specifies the new height of the texture data. If the resource view format is a block compressed format, * this value has to be 4 times the original height of the resource. For non block compressed formats, this value has to be equal to that of the * original resource.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::depth} specifies the new depth of the texture data. This value has to be equal to that of the original resource.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel} specifies the most detailed mipmap level. This will be the new mipmap level zero. For * non-mipmapped resources, this value has to be zero. {@code CUDA_TEXTURE_DESC::minMipmapLevelClamp} and * {@code CUDA_TEXTURE_DESC::maxMipmapLevelClamp} will be relative to this value. For ex., if the {@code firstMipmapLevel} is set to 2, and a * {@code minMipmapLevelClamp} of 1.2 is specified, then the actual minimum mipmap level clamp will be 3.2.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel} specifies the least detailed mipmap level. For non-mipmapped resources, this value has to be zero.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::firstLayer} specifies the first layer index for layered textures. This will be the new layer zero. For non-layered * resources, this value has to be zero.
  • *
  • {@code CUDA_RESOURCE_VIEW_DESC::lastLayer} specifies the last layer index for layered textures. For non-layered resources, this value has to be * zero.
  • *
* * @param pTexObject texture object to create * @param pResDesc resource descriptor * @param pTexDesc texture descriptor * @param pResViewDesc resource view descriptor */ @NativeType("CUresult") public static int cuTexObjectCreate(@NativeType("CUtexObject *") LongBuffer pTexObject, @NativeType("CUDA_RESOURCE_DESC const *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUDA_TEXTURE_DESC const *") CUDA_TEXTURE_DESC pTexDesc, @NativeType("CUDA_RESOURCE_VIEW_DESC const *") CUDA_RESOURCE_VIEW_DESC pResViewDesc) { if (CHECKS) { check(pTexObject, 1); } return ncuTexObjectCreate(memAddress(pTexObject), pResDesc.address(), pTexDesc.address(), pResViewDesc.address()); } // --- [ cuTexObjectDestroy ] --- /** * Destroys a texture object. * *

Destroys the texture object specified by {@code texObject}.

* * @param texObject texture object to destroy */ @NativeType("CUresult") public static int cuTexObjectDestroy(@NativeType("CUtexObject") long texObject) { long __functionAddress = Functions.TexObjectDestroy; if (CHECKS) { check(__functionAddress); } return callJI(texObject, __functionAddress); } // --- [ cuTexObjectGetResourceDesc ] --- /** Unsafe version of: {@link #cuTexObjectGetResourceDesc TexObjectGetResourceDesc} */ public static int ncuTexObjectGetResourceDesc(long pResDesc, long texObject) { long __functionAddress = Functions.TexObjectGetResourceDesc; if (CHECKS) { check(__functionAddress); } return callPJI(pResDesc, texObject, __functionAddress); } /** * Returns a texture object's resource descriptor. * *

Returns the resource descriptor for the texture object specified by {@code texObject}.

* * @param pResDesc resource descriptor * @param texObject texture object */ @NativeType("CUresult") public static int cuTexObjectGetResourceDesc(@NativeType("CUDA_RESOURCE_DESC *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUtexObject") long texObject) { return ncuTexObjectGetResourceDesc(pResDesc.address(), texObject); } // --- [ cuTexObjectGetTextureDesc ] --- /** Unsafe version of: {@link #cuTexObjectGetTextureDesc TexObjectGetTextureDesc} */ public static int ncuTexObjectGetTextureDesc(long pTexDesc, long texObject) { long __functionAddress = Functions.TexObjectGetTextureDesc; if (CHECKS) { check(__functionAddress); } return callPJI(pTexDesc, texObject, __functionAddress); } /** * Returns a texture object's texture descriptor. * *

Returns the texture descriptor for the texture object specified by {@code texObject}.

* * @param pTexDesc texture descriptor * @param texObject texture object */ @NativeType("CUresult") public static int cuTexObjectGetTextureDesc(@NativeType("CUDA_TEXTURE_DESC *") CUDA_TEXTURE_DESC pTexDesc, @NativeType("CUtexObject") long texObject) { return ncuTexObjectGetTextureDesc(pTexDesc.address(), texObject); } // --- [ cuTexObjectGetResourceViewDesc ] --- /** Unsafe version of: {@link #cuTexObjectGetResourceViewDesc TexObjectGetResourceViewDesc} */ public static int ncuTexObjectGetResourceViewDesc(long pResViewDesc, long texObject) { long __functionAddress = Functions.TexObjectGetResourceViewDesc; if (CHECKS) { check(__functionAddress); } return callPJI(pResViewDesc, texObject, __functionAddress); } /** * Returns a texture object's resource view descriptor. * *

Returns the resource view descriptor for the texture object specified by {@code texObject}. If no resource view was set for {@code texObject}, the * {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* * @param pResViewDesc resource view descriptor * @param texObject texture object */ @NativeType("CUresult") public static int cuTexObjectGetResourceViewDesc(@NativeType("CUDA_RESOURCE_VIEW_DESC *") CUDA_RESOURCE_VIEW_DESC pResViewDesc, @NativeType("CUtexObject") long texObject) { return ncuTexObjectGetResourceViewDesc(pResViewDesc.address(), texObject); } // --- [ cuSurfObjectCreate ] --- /** Unsafe version of: {@link #cuSurfObjectCreate SurfObjectCreate} */ public static int ncuSurfObjectCreate(long pSurfObject, long pResDesc) { long __functionAddress = Functions.SurfObjectCreate; if (CHECKS) { check(__functionAddress); } return callPPI(pSurfObject, pResDesc, __functionAddress); } /** * Creates a surface object. * *

Creates a surface object and returns it in {@code pSurfObject}. {@code pResDesc} describes the data to perform surface load/stores on. * {@code CUDA_RESOURCE_DESC::resType} must be {@link #CU_RESOURCE_TYPE_ARRAY RESOURCE_TYPE_ARRAY} and {@code CUDA_RESOURCE_DESC::res::array::hArray} must be set to a valid CUDA array * handle. {@code CUDA_RESOURCE_DESC::flags} must be set to zero.

* *

Surface objects are only supported on devices of compute capability 3.0 or higher. Additionally, a surface object is an opaque value, and, as such, * should only be accessed through CUDA API calls.

* * @param pSurfObject surface object to create * @param pResDesc resource descriptor */ @NativeType("CUresult") public static int cuSurfObjectCreate(@NativeType("CUsurfObject *") LongBuffer pSurfObject, @NativeType("CUDA_RESOURCE_DESC const *") CUDA_RESOURCE_DESC pResDesc) { if (CHECKS) { check(pSurfObject, 1); } return ncuSurfObjectCreate(memAddress(pSurfObject), pResDesc.address()); } // --- [ cuSurfObjectDestroy ] --- /** * Destroys a surface object. * *

Destroys the surface object specified by {@code surfObject}.

* * @param surfObject surface object to destroy */ @NativeType("CUresult") public static int cuSurfObjectDestroy(@NativeType("CUsurfObject") long surfObject) { long __functionAddress = Functions.SurfObjectDestroy; if (CHECKS) { check(__functionAddress); } return callJI(surfObject, __functionAddress); } // --- [ cuSurfObjectGetResourceDesc ] --- /** Unsafe version of: {@link #cuSurfObjectGetResourceDesc SurfObjectGetResourceDesc} */ public static int ncuSurfObjectGetResourceDesc(long pResDesc, long surfObject) { long __functionAddress = Functions.SurfObjectGetResourceDesc; if (CHECKS) { check(__functionAddress); } return callPJI(pResDesc, surfObject, __functionAddress); } /** * Returns a surface object's resource descriptor. * *

Returns the resource descriptor for the surface object specified by {@code surfObject}.

* * @param pResDesc resource descriptor * @param surfObject surface object */ @NativeType("CUresult") public static int cuSurfObjectGetResourceDesc(@NativeType("CUDA_RESOURCE_DESC *") CUDA_RESOURCE_DESC pResDesc, @NativeType("CUsurfObject") long surfObject) { return ncuSurfObjectGetResourceDesc(pResDesc.address(), surfObject); } // --- [ cuDeviceCanAccessPeer ] --- /** Unsafe version of: {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} */ public static int ncuDeviceCanAccessPeer(long canAccessPeer, int dev, int peerDev) { long __functionAddress = Functions.DeviceCanAccessPeer; if (CHECKS) { check(__functionAddress); } return callPI(canAccessPeer, dev, peerDev, __functionAddress); } /** * Queries if a device may directly access a peer device's memory. * *

Returns in {@code *canAccessPeer} a value of 1 if contexts on {@code dev} are capable of directly accessing memory from contexts on {@code peerDev} and * 0 otherwise. If direct access of {@code peerDev} from {@code dev} is possible, then access may be enabled on two specific contexts by calling * {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}.

* * @param canAccessPeer returned access capability * @param dev device from which allocations on {@code peerDev} are to be directly accessed * @param peerDev device on which the allocations to be directly accessed by {@code dev} reside */ @NativeType("CUresult") public static int cuDeviceCanAccessPeer(@NativeType("int *") IntBuffer canAccessPeer, @NativeType("CUdevice") int dev, @NativeType("CUdevice") int peerDev) { if (CHECKS) { check(canAccessPeer, 1); } return ncuDeviceCanAccessPeer(memAddress(canAccessPeer), dev, peerDev); } // --- [ cuCtxEnablePeerAccess ] --- /** * Enables direct access to memory allocations in a peer context. * *

If both the current context and {@code peerContext} are on devices which support unified addressing (as may be queried using * {@link #CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING}) and same major compute capability, then on success all allocations from {@code peerContext} will immediately * be accessible by the current context. See ref for additional details.

* *

Note that access granted by this call is unidirectional and that in order to access memory from the current context in {@code peerContext}, a separate * symmetric call to {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess} is required.

* *

Note that there are both device-wide and system-wide limitations per system configuration, as noted in the CUDA Programming Guide under the section * "Peer-to-Peer Memory Access".

* *

Returns {@link #CUDA_ERROR_PEER_ACCESS_UNSUPPORTED} if {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} indicates that the {@code CUdevice} of the current context cannot directly access * memory from the {@code CUdevice} of {@code peerContext}.

* *

Returns {@link #CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED} if direct access of {@code peerContext} from the current context has already been enabled.

* *

Returns {@link #CUDA_ERROR_TOO_MANY_PEERS} if direct peer access is not possible because hardware resources required for peer access have been exhausted.

* *

Returns {@link #CUDA_ERROR_INVALID_CONTEXT} if there is no current context, {@code peerContext} is not a valid context, or if the current context is {@code * peerContext}.

* *

Returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code Flags} is not 0.

* * @param peerContext peer context to enable direct access to from the current context * @param Flags reserved for future use and must be set to 0 */ @NativeType("CUresult") public static int cuCtxEnablePeerAccess(@NativeType("CUcontext") long peerContext, @NativeType("unsigned int") int Flags) { long __functionAddress = Functions.CtxEnablePeerAccess; if (CHECKS) { check(__functionAddress); check(peerContext); } return callPI(peerContext, Flags, __functionAddress); } // --- [ cuCtxDisablePeerAccess ] --- /** * Disables direct access to memory allocations in a peer context and unregisters any registered allocations. * *

Returns {@link #CUDA_ERROR_PEER_ACCESS_NOT_ENABLED} if direct peer access has not yet been enabled from {@code peerContext} to the current context.

* *

Returns {@link #CUDA_ERROR_INVALID_CONTEXT} if there is no current context, or if {@code peerContext} is not a valid context.

* * @param peerContext peer context to disable direct access to */ @NativeType("CUresult") public static int cuCtxDisablePeerAccess(@NativeType("CUcontext") long peerContext) { long __functionAddress = Functions.CtxDisablePeerAccess; if (CHECKS) { check(__functionAddress); check(peerContext); } return callPI(peerContext, __functionAddress); } // --- [ cuDeviceGetP2PAttribute ] --- /** Unsafe version of: {@link #cuDeviceGetP2PAttribute DeviceGetP2PAttribute} */ public static int ncuDeviceGetP2PAttribute(long value, int attrib, int srcDevice, int dstDevice) { long __functionAddress = Functions.DeviceGetP2PAttribute; if (CHECKS) { check(__functionAddress); } return callPI(value, attrib, srcDevice, dstDevice, __functionAddress); } /** * Queries attributes of the link between two devices. * *

Returns in {@code *value} the value of the requested attribute {@code attrib} of the link between {@code srcDevice} and {@code dstDevice}. The * supported attributes are:

* *
    *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK}: A relative value indicating the performance of the link between two devices.
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED} P2P: 1 if P2P Access is enable.
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED}: 1 if Atomic operations over the link are supported.
  • *
  • {@link #CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED}: 1 if cudaArray can be accessed over the link.
  • *
* *

Returns {@link #CUDA_ERROR_INVALID_DEVICE} if {@code srcDevice} or {@code dstDevice} are not valid or if they represent the same device.

* *

Returns {@link #CUDA_ERROR_INVALID_VALUE} if {@code attrib} is not valid or if {@code value} is a null pointer.

* * @param value returned value of the requested attribute * @param attrib the requested attribute of the link between {@code srcDevice} and {@code dstDevice} * @param srcDevice the source device of the target link * @param dstDevice the destination device of the target link */ @NativeType("CUresult") public static int cuDeviceGetP2PAttribute(@NativeType("int *") IntBuffer value, @NativeType("CUdevice_P2PAttribute") int attrib, @NativeType("CUdevice") int srcDevice, @NativeType("CUdevice") int dstDevice) { if (CHECKS) { check(value, 1); } return ncuDeviceGetP2PAttribute(memAddress(value), attrib, srcDevice, dstDevice); } // --- [ cuGraphicsUnregisterResource ] --- /** * Unregisters a graphics resource for access by CUDA. * *

Unregisters the graphics resource {@code resource} so it is not accessible by CUDA unless registered again.

* *

If {@code resource} is invalid then {@link #CUDA_ERROR_INVALID_HANDLE} is returned.

* * @param resource resource to unregister */ @NativeType("CUresult") public static int cuGraphicsUnregisterResource(@NativeType("CUgraphicsResource") long resource) { long __functionAddress = Functions.GraphicsUnregisterResource; if (CHECKS) { check(resource); } return callPI(resource, __functionAddress); } // --- [ cuGraphicsSubResourceGetMappedArray ] --- /** Unsafe version of: {@link #cuGraphicsSubResourceGetMappedArray GraphicsSubResourceGetMappedArray} */ public static int ncuGraphicsSubResourceGetMappedArray(long pArray, long resource, int arrayIndex, int mipLevel) { long __functionAddress = Functions.GraphicsSubResourceGetMappedArray; if (CHECKS) { check(resource); } return callPPI(pArray, resource, arrayIndex, mipLevel, __functionAddress); } /** * Get an array through which to access a subresource of a mapped graphics resource. * *

Returns in {@code *pArray} an array through which the subresource of the mapped graphics resource {@code resource} which corresponds to array index * {@code arrayIndex} and mipmap level {@code mipLevel} may be accessed. The value set in {@code *pArray} may change every time that {@code resource} is * mapped.

* *

If {@code resource} is not a texture then it cannot be accessed via an array and {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY} is returned. If {@code arrayIndex} is * not a valid array index for {@code resource} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. If {@code mipLevel} is not a valid mipmap level for {@code * resource} then {@link #CUDA_ERROR_INVALID_VALUE} is returned. If {@code resource} is not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned.

* * @param pArray returned array through which a subresource of {@code resource} may be accessed * @param resource mapped resource to access * @param arrayIndex array index for array textures or cubemap face index as defined by {@code CUarray_cubemap_face} for cubemap textures for the subresource to access * @param mipLevel mipmap level for the subresource to access */ @NativeType("CUresult") public static int cuGraphicsSubResourceGetMappedArray(@NativeType("CUarray *") PointerBuffer pArray, @NativeType("CUgraphicsResource") long resource, @NativeType("unsigned int") int arrayIndex, @NativeType("unsigned int") int mipLevel) { if (CHECKS) { check(pArray, 1); } return ncuGraphicsSubResourceGetMappedArray(memAddress(pArray), resource, arrayIndex, mipLevel); } // --- [ cuGraphicsResourceGetMappedMipmappedArray ] --- /** Unsafe version of: {@link #cuGraphicsResourceGetMappedMipmappedArray GraphicsResourceGetMappedMipmappedArray} */ public static int ncuGraphicsResourceGetMappedMipmappedArray(long pMipmappedArray, long resource) { long __functionAddress = Functions.GraphicsResourceGetMappedMipmappedArray; if (CHECKS) { check(__functionAddress); check(resource); } return callPPI(pMipmappedArray, resource, __functionAddress); } /** * Get a mipmapped array through which to access a mapped graphics resource. * *

Returns in {@code *pMipmappedArray} a mipmapped array through which the mapped graphics resource {@code resource}. The value set in * {@code *pMipmappedArray} may change every time that {@code resource} is mapped.

* *

If {@code resource} is not a texture then it cannot be accessed via a mipmapped array and {@link #CUDA_ERROR_NOT_MAPPED_AS_ARRAY} is returned. If {@code * resource} is not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned.

* * @param pMipmappedArray returned mipmapped array through which {@code resource} may be accessed * @param resource mapped resource to access */ @NativeType("CUresult") public static int cuGraphicsResourceGetMappedMipmappedArray(@NativeType("CUmipmappedArray *") PointerBuffer pMipmappedArray, @NativeType("CUgraphicsResource") long resource) { if (CHECKS) { check(pMipmappedArray, 1); } return ncuGraphicsResourceGetMappedMipmappedArray(memAddress(pMipmappedArray), resource); } // --- [ cuGraphicsResourceGetMappedPointer ] --- /** Unsafe version of: {@link #cuGraphicsResourceGetMappedPointer GraphicsResourceGetMappedPointer} */ public static int ncuGraphicsResourceGetMappedPointer(long pDevPtr, long pSize, long resource) { long __functionAddress = Functions.GraphicsResourceGetMappedPointer; if (CHECKS) { check(resource); } return callPPPI(pDevPtr, pSize, resource, __functionAddress); } /** * Get a device pointer through which to access a mapped graphics resource. * *

Returns in {@code *pDevPtr} a pointer through which the mapped graphics resource {@code resource} may be accessed. Returns in {@code pSize} the size of * the memory in bytes which may be accessed from that pointer. The value set in {@code pPointer} may change every time that {@code resource} is mapped.

* *

If {@code resource} is not a buffer then it cannot be accessed via a pointer and {@link #CUDA_ERROR_NOT_MAPPED_AS_POINTER} is returned. If {@code resource} is * not mapped then {@link #CUDA_ERROR_NOT_MAPPED} is returned. *

* * @param pDevPtr returned pointer through which {@code resource} may be accessed * @param pSize returned size of the buffer accessible starting at {@code *pPointer} * @param resource mapped resource to access */ @NativeType("CUresult") public static int cuGraphicsResourceGetMappedPointer(@NativeType("CUdeviceptr *") PointerBuffer pDevPtr, @NativeType("size_t *") PointerBuffer pSize, @NativeType("CUgraphicsResource") long resource) { if (CHECKS) { check(pDevPtr, 1); check(pSize, 1); } return ncuGraphicsResourceGetMappedPointer(memAddress(pDevPtr), memAddress(pSize), resource); } // --- [ cuGraphicsResourceSetMapFlags ] --- /** * Set usage flags for mapping a graphics resource. * *

Set {@code flags} for mapping the graphics resource {@code resource}.

* *

Changes to {@code flags} will take effect the next time {@code resource} is mapped. The {@code flags} argument may be any of the following:

* *
    *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE GRAPHICS_MAP_RESOURCE_FLAGS_NONE}: Specifies no hints about how this resource will be used. It is therefore assumed that this resource will be * read from and written to by CUDA kernels. This is the default value.
  • *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY}: Specifies that CUDA kernels which access this resource will not write to this resource.
  • *
  • {@link #CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD}: Specifies that CUDA kernels which access this resource will not read from this resource and will * write over the entire contents of the resource, so none of the data previously stored in the resource will be preserved.
  • *
* *

If {@code resource} is presently mapped for access by CUDA then {@link #CUDA_ERROR_ALREADY_MAPPED} is returned. If {@code flags} is not one of the above * values then {@link #CUDA_ERROR_INVALID_VALUE} is returned.

* * @param resource registered resource to set flags for * @param flags parameters for resource mapping */ @NativeType("CUresult") public static int cuGraphicsResourceSetMapFlags(@NativeType("CUgraphicsResource") long resource, @NativeType("unsigned int") int flags) { long __functionAddress = Functions.GraphicsResourceSetMapFlags; if (CHECKS) { check(resource); } return callPI(resource, flags, __functionAddress); } // --- [ cuGraphicsMapResources ] --- /** * Unsafe version of: {@link #cuGraphicsMapResources GraphicsMapResources} * * @param count number of resources to map */ public static int ncuGraphicsMapResources(int count, long resources, long hStream) { long __functionAddress = Functions.GraphicsMapResources; return callPPI(count, resources, hStream, __functionAddress); } /** * Map graphics resources for access by CUDA. * *

Maps the {@code count} graphics resources in {@code resources} for access by CUDA.

* *

The resources in {@code resources} may be accessed by CUDA until they are unmapped. The graphics API from which {@code resources} were registered * should not access any resources while they are mapped by CUDA. If an application does so, the results are undefined.

* *

This function provides the synchronization guarantee that any graphics calls issued before {@link #cuGraphicsMapResources GraphicsMapResources} will complete before any * subsequent CUDA work issued in {@code stream} begins.

* *

If {@code resources} includes any duplicate entries then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If any of {@code resources} are presently mapped for * access by CUDA then {@link #CUDA_ERROR_ALREADY_MAPPED} is returned.

* * @param resources resources to map for CUDA usage * @param hStream stream with which to synchronize */ @NativeType("CUresult") public static int cuGraphicsMapResources(@NativeType("CUgraphicsResource *") PointerBuffer resources, @NativeType("CUstream") long hStream) { return ncuGraphicsMapResources(resources.remaining(), memAddress(resources), hStream); } // --- [ cuGraphicsUnmapResources ] --- /** * Unsafe version of: {@link #cuGraphicsUnmapResources GraphicsUnmapResources} * * @param count number of resources to unmap */ public static int ncuGraphicsUnmapResources(int count, long resources, long hStream) { long __functionAddress = Functions.GraphicsUnmapResources; return callPPI(count, resources, hStream, __functionAddress); } /** * Unmap graphics resources. * *

Unmaps the {@code count} graphics resources in {@code resources}.

* *

Once unmapped, the resources in {@code resources} may not be accessed by CUDA until they are mapped again.

* *

This function provides the synchronization guarantee that any CUDA work issued in {@code stream} before {@link #cuGraphicsUnmapResources GraphicsUnmapResources} will complete * before any subsequently issued graphics work begins.

* *

If {@code resources} includes any duplicate entries then {@link #CUDA_ERROR_INVALID_HANDLE} is returned. If any of {@code resources} are not presently mapped * for access by CUDA then {@link #CUDA_ERROR_NOT_MAPPED} is returned.

* * @param resources resources to unmap * @param hStream stream with which to synchronize */ @NativeType("CUresult") public static int cuGraphicsUnmapResources(@NativeType("CUgraphicsResource *") PointerBuffer resources, @NativeType("CUstream") long hStream) { return ncuGraphicsUnmapResources(resources.remaining(), memAddress(resources), hStream); } // --- [ cuGetProcAddress ] --- /** Unsafe version of: {@link #cuGetProcAddress GetProcAddress} */ public static int ncuGetProcAddress(long symbol, long pfn, int cudaVersion, long flags) { long __functionAddress = Functions.GetProcAddress; if (CHECKS) { check(__functionAddress); } return callPPJI(symbol, pfn, cudaVersion, flags, __functionAddress); } /** * Returns the requested driver API function pointer. * *

Returns in {@code **pfn} the address of the CUDA driver function for the requested CUDA version and flags.

* *

The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2 should be specified as 11020. For a requested driver symbol, if the * specified CUDA version is greater than or equal to the CUDA version in which the driver symbol was introduced, this API will return the function * pointer to the corresponding versioned function.

* *

The pointer returned by the API should be cast to a function pointer matching the requested driver function's definition in the API header file. The * function pointer typedef can be picked up from the corresponding typedefs header file. For example, cudaTypedefs.h consists of function pointer * typedefs for driver APIs defined in cuda.h.

* *

The API will return {@link #CUDA_ERROR_NOT_FOUND} if the requested driver function is not supported on the platform, no ABI compatible driver function exists * for the specified {@code cudaVersion} or if the driver symbol is invalid.

* *

The requested flags can be:

* *
    *
  • {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT}: This is the default mode. This is equivalent to {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} if the code is * compiled with --default-stream per-thread compilation flag or the macro {@code CUDA_API_PER_THREAD_DEFAULT_STREAM} is defined; * {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} otherwise.
  • *
  • {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name except the * corresponding per-thread versions.
  • *
  • {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name * including the per-thread versions. If a per-thread version is not found, the API will return the legacy version of the driver function.
  • *
* * @param symbol the base name of the driver API function to look for. As an example, for the driver API {@code cuMemAlloc_v2()}, {@code symbol} would be * {@code cuMemAlloc} and {@code cudaVersion} would be the ABI compatible CUDA version for the {@code _v2} variant. * @param pfn location to return the function pointer to the requested driver function * @param cudaVersion the CUDA version to look for the requested driver symbol * @param flags flags to specify search options */ @NativeType("CUresult") public static int cuGetProcAddress(@NativeType("char const *") ByteBuffer symbol, @NativeType("void **") PointerBuffer pfn, int cudaVersion, @NativeType("cuuint64_t") long flags) { if (CHECKS) { checkNT1(symbol); check(pfn, 1); } return ncuGetProcAddress(memAddress(symbol), memAddress(pfn), cudaVersion, flags); } /** * Returns the requested driver API function pointer. * *

Returns in {@code **pfn} the address of the CUDA driver function for the requested CUDA version and flags.

* *

The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2 should be specified as 11020. For a requested driver symbol, if the * specified CUDA version is greater than or equal to the CUDA version in which the driver symbol was introduced, this API will return the function * pointer to the corresponding versioned function.

* *

The pointer returned by the API should be cast to a function pointer matching the requested driver function's definition in the API header file. The * function pointer typedef can be picked up from the corresponding typedefs header file. For example, cudaTypedefs.h consists of function pointer * typedefs for driver APIs defined in cuda.h.

* *

The API will return {@link #CUDA_ERROR_NOT_FOUND} if the requested driver function is not supported on the platform, no ABI compatible driver function exists * for the specified {@code cudaVersion} or if the driver symbol is invalid.

* *

The requested flags can be:

* *
    *
  • {@link #CU_GET_PROC_ADDRESS_DEFAULT GET_PROC_ADDRESS_DEFAULT}: This is the default mode. This is equivalent to {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM} if the code is * compiled with --default-stream per-thread compilation flag or the macro {@code CUDA_API_PER_THREAD_DEFAULT_STREAM} is defined; * {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM} otherwise.
  • *
  • {@link #CU_GET_PROC_ADDRESS_LEGACY_STREAM GET_PROC_ADDRESS_LEGACY_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name except the * corresponding per-thread versions.
  • *
  • {@link #CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM}: This will enable the search for all driver symbols that match the requested driver symbol name * including the per-thread versions. If a per-thread version is not found, the API will return the legacy version of the driver function.
  • *
* * @param symbol the base name of the driver API function to look for. As an example, for the driver API {@code cuMemAlloc_v2()}, {@code symbol} would be * {@code cuMemAlloc} and {@code cudaVersion} would be the ABI compatible CUDA version for the {@code _v2} variant. * @param pfn location to return the function pointer to the requested driver function * @param cudaVersion the CUDA version to look for the requested driver symbol * @param flags flags to specify search options */ @NativeType("CUresult") public static int cuGetProcAddress(@NativeType("char const *") CharSequence symbol, @NativeType("void **") PointerBuffer pfn, int cudaVersion, @NativeType("cuuint64_t") long flags) { if (CHECKS) { check(pfn, 1); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { stack.nASCII(symbol, true); long symbolEncoded = stack.getPointerAddress(); return ncuGetProcAddress(symbolEncoded, memAddress(pfn), cudaVersion, flags); } finally { stack.setPointer(stackPointer); } } // --- [ cuGetExportTable ] --- public static int ncuGetExportTable(long ppExportTable, long pExportTableId) { long __functionAddress = Functions.GetExportTable; return callPPI(ppExportTable, pExportTableId, __functionAddress); } @NativeType("CUresult") public static int cuGetExportTable(@NativeType("void const **") PointerBuffer ppExportTable, @NativeType("CUuuid const *") CUuuid pExportTableId) { return ncuGetExportTable(memAddress(ppExportTable), pExportTableId.address()); } // --- [ cuIpcOpenEventHandle ] --- private static final FFICIF IpcOpenEventHandleCIF = apiCreateCIF( apiStdcall(), ffi_type_uint32, ffi_type_pointer, apiCreateStruct(apiCreateArray(ffi_type_schar, 64)) ); /** Unsafe version of: {@link #cuIpcOpenEventHandle IpcOpenEventHandle} */ public static int ncuIpcOpenEventHandle(long phEvent, long handle) { long __functionAddress = Functions.IpcOpenEventHandle$Address; if (CHECKS) { check(__functionAddress); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { long __result = stack.nmalloc(4); long values = stack.nmalloc(8, POINTER_SIZE + 8); memPutAddress(values, phEvent); memPutLong(values + POINTER_SIZE, handle); long arguments = stack.nmalloc(POINTER_SIZE,POINTER_SIZE * 2); memPutAddress(arguments, values); memPutAddress(arguments + POINTER_SIZE, values + POINTER_SIZE); nffi_call(IpcOpenEventHandleCIF.address(), __functionAddress, __result, arguments); return memGetInt(__result); } finally { stack.setPointer(stackPointer); } } /** * Opens an interprocess event handle for use in the current process. * *

Opens an interprocess event handle exported from another process with {@link #cuIpcGetEventHandle IpcGetEventHandle}. This function returns a {@code CUevent} that behaves like a * locally created event with the {@link #CU_EVENT_DISABLE_TIMING EVENT_DISABLE_TIMING} flag specified. This event must be freed with {@link #cuEventDestroy EventDestroy}.

* *

Performing operations on the imported event after the exported event has been freed with {@link #cuEventDestroy EventDestroy} will result in undefined behavior.

* *

IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is * restricted to GPUs in TCC mode.

* * @param phEvent returns the imported event * @param handle interprocess handle to open */ @NativeType("CUresult") public static int cuIpcOpenEventHandle(@NativeType("CUevent *") PointerBuffer phEvent, @NativeType("CUipcEventHandle") CUIPCEventHandle handle) { if (CHECKS) { check(phEvent, 1); } return ncuIpcOpenEventHandle(memAddress(phEvent), handle.address()); } // --- [ cuIpcOpenMemHandle ] --- private static final FFICIF IpcOpenMemHandleCIF = apiCreateCIF( apiStdcall(), ffi_type_uint32, ffi_type_pointer, apiCreateStruct(apiCreateArray(ffi_type_schar, 64)), ffi_type_uint32 ); /** Unsafe version of: {@link #cuIpcOpenMemHandle IpcIpcOpenMemHandle} */ public static int ncuIpcOpenMemHandle(long pdptr, long handle, int Flags) { long __functionAddress = Functions.IpcOpenEventHandle$Address; if (CHECKS) { check(__functionAddress); } MemoryStack stack = stackGet(); int stackPointer = stack.getPointer(); try { long __result = stack.nmalloc(4); long values = stack.nmalloc(8, POINTER_SIZE + 8 + 4); memPutAddress(values, pdptr); memPutLong(values + POINTER_SIZE, handle); memPutInt(values + POINTER_SIZE + 8, Flags); long arguments = stack.nmalloc(POINTER_SIZE,POINTER_SIZE * 3); memPutAddress(arguments, values); memPutAddress(arguments + POINTER_SIZE, values + POINTER_SIZE); memPutAddress(arguments + POINTER_SIZE * 2, values + POINTER_SIZE + 8); nffi_call(IpcOpenMemHandleCIF.address(), __functionAddress, __result, arguments); return memGetInt(__result); } finally { stack.setPointer(stackPointer); } } /** * Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process. * *

Maps memory exported from another process with {@link #cuIpcGetMemHandle IpcGetMemHandle} into the current device address space. For contexts on different devices * {@code cuIpcOpenMemHandle} can attempt to enable peer access between the devices as if the user called {@link #cuCtxEnablePeerAccess CtxEnablePeerAccess}. This behavior is controlled * by the {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS} flag. {@link #cuDeviceCanAccessPeer DeviceCanAccessPeer} can determine if a mapping is possible.

* *

Contexts that may open {@link CUIPCMemHandle}s are restricted in the following way. {@code CUipcMemHandles} from each {@code CUdevice} in a given process may * only be opened by one {@code CUcontext} per {@code CUdevice} per other process.

* *

If the memory handle has already been opened by the current context, the reference count on the handle is incremented by 1 and the existing device * pointer is returned.

* *

Memory returned from {@code cuIpcOpenMemHandle} must be freed with {@link #cuIpcCloseMemHandle IpcCloseMemHandle}.

* *

Calling {@link #cuMemFree MemFree} on an exported memory region before calling {@link #cuIpcCloseMemHandle IpcCloseMemHandle} in the importing context will result in undefined behavior.

* *

IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on Windows is * restricted to GPUs in TCC mode

* *
Note
* *

No guarantees are made about the address returned in {@code *pdptr}. In particular, multiple processes may not receive the same address for * the same {@code handle}.

* * @param pdptr returned device pointer * @param handle {@code CUipcMemHandle} to open * @param Flags flags for this operation. Must be specified as {@link #CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS IPC_MEM_LAZY_ENABLE_PEER_ACCESS} */ @NativeType("CUresult") public static int cuIpcOpenMemHandle(@NativeType("CUdeviceptr *") PointerBuffer pdptr, @NativeType("CUipcMemHandle") CUIPCMemHandle handle, @NativeType("unsigned int") int Flags) { if (CHECKS) { check(pdptr, 1); } return ncuIpcOpenMemHandle(memAddress(pdptr), handle.address(), Flags); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy