jcuda.utils.KernelLauncher Maven / Gradle / Ivy
/*
* JCudaUtils - Utilities for JCuda
* http://www.jcuda.org
*
* Copyright (c) 2010 Marco Hutter - http://www.jcuda.org
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package jcuda.utils;
import static jcuda.driver.JCudaDriver.cuDeviceGetCount;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoadDataEx;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import jcuda.CudaException;
import jcuda.Pointer;
import jcuda.driver.CUcontext;
import jcuda.driver.CUfunction;
import jcuda.driver.CUmodule;
import jcuda.driver.CUresult;
import jcuda.driver.CUstream;
import jcuda.driver.JCudaDriver;
import jcuda.runtime.JCuda;
import jcuda.runtime.dim3;
import org.nd4j.linalg.jcublas.context.ContextHolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is a utility class that simplifies the setup and launching
* of CUDA kernels using the JCuda Driver API.
*
* Instances of this class may be created using one of the following
* methods:
*
* -
* {@link KernelLauncher#compile(String, String, String...)} will
* compile a kernel from a String containing the CUDA source code
*
* -
* {@link KernelLauncher#create(String, String, String...)} will
* create a kernel for a function that is contained in a CUDA
* source file
*
* -
* {@link KernelLauncher#load(String, String)} will load a kernel from
* a PTX or CUBIN (CUDA binary) file.
*
* -
* {@link KernelLauncher#load(InputStream, String)} will load a kernel
* from PTX- or CUBIN data which is provided via an InputStream
* (useful for packaging PTX- or CUBIN files into JAR archives)
*
*
*
*
* These instances may then be used to call a kernel function with
* the {@link KernelLauncher#call(Object...)} method. The actual
* kernel function arguments which are passed to this method
* will be set up automatically, and aligned appropriately for
* their respective size.
*
* The setup of the execution may be performed similarly as the invocation
* of a kernel when using the Runtime API in C. Such a call has the
* form
*
* kernel<<<gridDim, blockDim,
* sharedMemorySize, stream>>>(...);
*
*
* where
*
* -
* gridDim is a dim3 which specifies the number of blocks per
* grid
*
* -
* blockDim is a dim3 that specifies the number of threads
* per block
*
* -
* sharedMemorySize is the size of the shared memory for
* the kernel
*
* -
* stream is a stream for asynchronous kernel execution
*
*
* Similarly, the KernelLauncher allows specifying these parameters
* in the {@link KernelLauncher#setup(dim3, dim3, int, CUstream)}
* method:
*
*
* kernelLauncher.setup(gridDim,
* blockDim, sharedMemorySize, stream).call(...);
*
*
*
* When default values for some of the parameters should be used,
* one of the overloaded versions of the setup method may be called:
*
*
*
* kernelLauncher.setup(gridDim,
* blockDim).call(kernel);
*
*
*
* The parameters may also be set individually:
*
*
* kernelLauncher.setGridSize(gridSize);
* kernelLauncher.setBlockSize(blockSize);
* kernelLauncher.call(...);
*
*
*/
public class KernelLauncher {
/**
* The logger used in this class
*/
private static final Logger logger = LoggerFactory.getLogger(KernelLauncher.class.getName());
/**
* The path prefix, containing the path to the NVCC compiler.
* Not required if the path to the NVCC is present in an
* environment variable.
*/
private static String compilerPath = "";
/**
* The number of the device which should be used by the
* KernelLauncher
*/
private int deviceNumber = 0;
/**
* Set the path to the NVCC compiler. For example:
* setCompilerPath("C:/CUDA/bin");
*
* By default, this path is empty, assuming that the compiler
* is in a path that is visible via an environment variable.
*
* @param path The path to the NVCC compiler.
*/
public static void setCompilerPath(String path)
{
if (path == null)
{
compilerPath = "";
}
compilerPath = path;
if (!compilerPath.endsWith(File.separator))
{
compilerPath += File.separator;
}
}
/**
* Set the number (index) of the device which should be used
* by the KernelLauncher
*
* @param number The number of the device to use
* @throws CudaException If number < 0 or number >= deviceCount
*/
public void setDeviceNumber(int number)
{
int count[] = new int[1];
cuDeviceGetCount(count);
if (number < 0)
{
throw new CudaException(
"Invalid device number: " + number + ". "+
"There are only " + count[0] + " devices available");
}
deviceNumber = number;
}
/**
* Create a new KernelLauncher for the function with the given
* name, that is defined in the given source code.
*
* The source code is stored in a temporary .CU CUDA source file,
* and a PTX file is compiled from this source file using the
* NVCC (NVIDIA CUDA C Compiler) in a separate process.
* The optional nvccArguments are passed to the NVCC.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the
* NVCC.EXE has to be in a directory that is contained in the PATH
* environment variable. Alternatively, the path to the NVCC may
* be specified by calling {@link KernelLauncher#setCompilerPath(String)}
* with the respective path.
*
* Note: In order to make the function accessible
* by the name it has in the source code, the function has to
* be declared as an extern "C"
function:
*
*
* extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
*
*
* @see KernelLauncher#create(String, String, String...)
* @see KernelLauncher#create(String, String, boolean, String...)
* @see KernelLauncher#load(String, String)
*
* @param sourceCode The source code containing the function
* @param functionName The name of the function.
* @param nvccArguments Optional arguments for the NVCC
* @return The KernelLauncher for the specified function
* @throws CudaException If the creation of the CU- or PTX file
* fails, or the PTX may not be loaded, or the specified
* function can not be obtained.
*/
public static KernelLauncher compile(
String sourceCode, String functionName, String ... nvccArguments)
{
File cuFile = null;
try
{
cuFile = File.createTempFile("temp_JCuda_", ".cu");
}
catch (IOException e)
{
throw new CudaException("Could not create temporary .cu file", e);
}
String cuFileName = cuFile.getPath();
FileOutputStream fos = null;
try
{
fos = new FileOutputStream(cuFile);
fos.write(sourceCode.getBytes());
}
catch (IOException e)
{
throw new CudaException("Could not write temporary .cu file", e);
}
finally
{
if (fos != null)
{
try
{
fos.close();
}
catch (IOException e)
{
throw new CudaException(
"Could not close temporary .cu file", e);
}
}
}
return create(cuFileName, functionName, nvccArguments);
}
/**
* Create a new KernelLauncher for the function with the given
* name, that is contained in the .CU CUDA source file with the
* given name.
*
* Note: In order to make the function accessible
* by the name it has in the source code, the function has to
* be declared as an extern "C"
function:
*
*
* extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
*
* The extension of the given file name is replaced with "ptx".
* If the PTX file with the resulting name does not exist,
* or is older than the .CU file, it is compiled from
* the specified source file using the NVCC (NVIDIA CUDA C
* Compiler) in a separate process. The optional nvccArguments
* are passed to the NVCC.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the
* NVCC.EXE has to be in a directory that is contained in the PATH
* environment variable. Alternatively, the path to the NVCC may
* be specified by calling {@link KernelLauncher#setCompilerPath(String)}
* with the respective path.
*
* @see KernelLauncher#compile(String, String, String...)
* @see KernelLauncher#create(String, String, boolean, String...)
* @see KernelLauncher#load(String, String)
* @see KernelLauncher#load(InputStream, String)
*
* @param cuFileName The name of the source file.
* @param functionName The name of the function.
* @param nvccArguments Optional arguments for the NVCC
* @return The KernelLauncher for the specified function
* @throws CudaException If the creation of the PTX file fails,
* or the PTX may not be loaded, or the specified function can
* not be obtained.
*/
public static KernelLauncher create(
String cuFileName, String functionName, String ... nvccArguments)
{
return create(cuFileName, functionName, false, nvccArguments);
}
/**
* Create a new KernelLauncher for the function with the given
* name, that is contained in the .CU CUDA source file with the
* given name.
*
* Note: In order to make the function accessible
* by the name it has in the source code, the function has to
* be declared as an extern "C"
function:
*
*
* extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
*
* The extension of the given file name is replaced with "ptx".
* If the PTX file with the resulting name does not exist,
* or is older than the .CU file, it is compiled from
* the specified source file using the NVCC (NVIDIA CUDA C
* Compiler) in a separate process. The optional nvccArguments
* are passed to the NVCC.
*
* If the forceRebuild
flag is 'true', then the
* PTX file will be recompiled from the given source file,
* even if it already existed or was newer than the source
* file, and the already existing PTX file will be
* overwritten.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the
* NVCC.EXE has to be in a directory that is contained in the PATH
* environment variable. Alternatively, the path to the NVCC may
* be specified by calling {@link KernelLauncher#setCompilerPath(String)}
* with the respective path.
*
* @see KernelLauncher#compile(String, String, String...)
* @see KernelLauncher#create(String, String, String...)
* @see KernelLauncher#load(String, String)
* @see KernelLauncher#load(InputStream, String)
*
* @param cuFileName The name of the source file.
* @param functionName The name of the function.
* @param forceRebuild Whether the PTX file should be recompiled
* and overwritten if it already exists.
* @param nvccArguments Optional arguments for the NVCC
* @return The KernelLauncher for the specified function
* @throws CudaException If the creation of the PTX file fails,
* or the PTX may not be loaded, or the specified function can
* not be obtained.
*/
public static KernelLauncher create(
String cuFileName, String functionName,
boolean forceRebuild, String ... nvccArguments)
{
// Prepare the PTX file for the CU source file
String ptxFileName = null;
try
{
ptxFileName =
preparePtxFile(cuFileName, forceRebuild, nvccArguments);
}
catch (IOException e)
{
throw new CudaException(
"Could not prepare PTX for source file '"+cuFileName+"'", e);
}
KernelLauncher kernelLauncher = new KernelLauncher();
byte ptxData[] = loadData(ptxFileName);
kernelLauncher.initModule(ptxData);
kernelLauncher.initFunction(functionName);
return kernelLauncher;
}
/**
* Create a new KernelLauncher which may be used to execute the
* specified function which is loaded from the PTX- or CUBIN
* (CUDA binary) file with the given name.
*
* @see KernelLauncher#compile(String, String, String...)
* @see KernelLauncher#create(String, String, boolean, String...)
* @see KernelLauncher#load(String, String)
* @see KernelLauncher#load(InputStream, String)
*
* @param moduleFileName The name of the PTX- or CUBIN file
* @param functionName The name of the function
* @return The KernelLauncher for the specified function
* @throws CudaException If the PTX- or CUBIN may not be loaded,
* or the specified function can not be obtained.
*/
public static KernelLauncher load(
String moduleFileName, String functionName) {
KernelLauncher kernelLauncher = new KernelLauncher();
byte moduleData[] = loadData(moduleFileName);
kernelLauncher.initModule(moduleData);
kernelLauncher.initFunction(functionName);
return kernelLauncher;
}
/**
* Create a new KernelLauncher which may be used to execute the
* specified function which is loaded from the PTX- or CUBIN
* data that is read from the given input stream.
*
* @see KernelLauncher#compile(String, String, String...)
* @see KernelLauncher#create(String, String, boolean, String...)
* @see KernelLauncher#load(String, String)
* @see KernelLauncher#load(InputStream, String)
*
* @param moduleInputStream The stream for the PTX- or CUBIN data
* @param functionName The name of the function
* @return The KernelLauncher for the specified function
* @throws CudaException If the PTX- or CUBIN may not be loaded,
* or the specified function can not be obtained.
*/
public static KernelLauncher load(
InputStream moduleInputStream, String functionName)
{
KernelLauncher kernelLauncher = new KernelLauncher();
byte moduleData[] = loadData(moduleInputStream);
kernelLauncher.initModule(moduleData);
kernelLauncher.initFunction(functionName);
return kernelLauncher;
}
/**
* Load the data from the file with the given name and returns
* it as a 0-terminated byte array
*
* @param fileName The name of the file
* @return The data from the file
*/
private static byte[] loadData(String fileName)
{
InputStream inputStream = null;
try
{
inputStream= new FileInputStream(new File(fileName));
return loadData(inputStream);
}
catch (FileNotFoundException e)
{
throw new CudaException(
"Could not open '"+fileName+"'", e);
}
finally
{
if (inputStream != null)
{
try
{
inputStream.close();
}
catch (IOException e)
{
throw new CudaException(
"Could not close '"+fileName+"'", e);
}
}
}
}
/**
* Reads the data from the given inputStream and returns it as
* a 0-terminated byte array.
*
* @param inputStream The inputStream to read
* @return The data from the inputStream
*/
private static byte[] loadData(InputStream inputStream)
{
ByteArrayOutputStream baos = null;
try
{
baos = new ByteArrayOutputStream();
byte buffer[] = new byte[8192];
while (true)
{
int read = inputStream.read(buffer);
if (read == -1)
{
break;
}
baos.write(buffer, 0, read);
}
baos.write('\0');
baos.flush();
return baos.toByteArray();
}
catch (IOException e)
{
throw new CudaException(
"Could not load data", e);
}
finally
{
if (baos != null)
{
try
{
baos.close();
}
catch (IOException e)
{
throw new CudaException(
"Could not close output", e);
}
}
}
}
/**
* The context which was used to create this instance
*/
private CUcontext context;
/**
* The module which contains the function
*/
private CUmodule module;
/**
* The function which is executed with this KernelLauncher
*/
private CUfunction function;
/**
* The current block size (number of threads per block)
* which will be used for the function call.
*/
private dim3 blockSize = new dim3(1,1,1);
/**
* The current grid size (number of blocks per grid)
* which will be used for the function call.
*/
private dim3 gridSize = new dim3(1,1,1);
/**
* The currently specified size of the shared memory
* for the function call.
*/
private int sharedMemSize = 0;
/**
* The stream that should be associated with the function call.
*/
private CUstream stream;
/**
* Private constructor. Instantiation only via the static
* methods.
*/
private KernelLauncher() {
initialize();
}
/**
* Initializes this KernelLauncher. This method will try to
* initialize the JCuda driver API. Then it will try to
* attach to the current CUDA context. If no active CUDA
* context exists, then it will try to create one, for
* the device which is specified by the current
* deviceNumber.
*
* @throws CudaException If it is neither possible to
* attach to an existing context, nor to create a new
* context.
*/
private void initialize() {
context = ContextHolder.getInstance().getContext(deviceNumber);
}
/**
* Sync the context for the current thread.
*/
public static void setContext() {
int status = JCudaDriver.cuCtxSetCurrent(ContextHolder.getInstance().getContext());
if(status != CUresult.CUDA_SUCCESS) {
throw new IllegalStateException("Unable to set context");
}
checkResult(JCudaDriver.cuCtxSynchronize());
checkResult(JCuda.cudaDeviceSynchronize());
}
/**
* Create a new KernelLauncher which uses the same module as
* this KernelLauncher, but may be used to execute a different
* function. All parameters (grid size, block size, shared
* memory size and stream) of the returned KernelLauncher
* will be independent of 'this' one and initially contain
* the default values.
*
* @param functionName The name of the function
* @return The KernelLauncher for the specified function
* @throws CudaException If the specified function can not
* be obtained from the module of this KernelLauncher.
*/
public KernelLauncher forFunction(String functionName)
{
KernelLauncher kernelLauncher = new KernelLauncher();
kernelLauncher.module = this.module;
kernelLauncher.initFunction(functionName);
return kernelLauncher;
}
/**
* Initialize the module for this KernelLauncher by loading
* the PTX- or CUBIN file with the given name.
*
* @param moduleData The data from the PTX- or CUBIN file
*/
private void initModule(byte moduleData[])
{
module = new CUmodule();
checkResult(cuModuleLoadDataEx(module, Pointer.to(moduleData),
0, new int[0], Pointer.to(new int[0])));
}
/**
* Initialize this KernelLauncher for calling the function with
* the given name, which is contained in the module of this
* KernelLauncher
*
* @param functionName The name of the function
*/
private void initFunction(String functionName)
{
// Obtain the function from the module
function = new CUfunction();
String functionErrorString =
"Could not get function '"+functionName+"' from module. "+"\n"+
"Name in module might be mangled. Try adding the line "+"\n"+
"extern \"C\""+"\n"+
"before the function you want to call, or open the " +
"PTX/CUBIN "+"\n"+"file with a text editor to find out " +
"the mangled function name";
try
{
int result = cuModuleGetFunction(function, module, functionName);
if (result != CUresult.CUDA_SUCCESS)
{
throw new CudaException(functionErrorString);
}
}
catch (CudaException e)
{
throw new CudaException(functionErrorString, e);
}
}
/**
* Returns the module that was created from the PTX- or CUBIN file, and
* which contains the function that should be executed. This
* module may also be used to access symbols and texture
* references. However, clients should not modify or unload
* the module.
*
* @return The CUmodule
*/
public CUmodule getModule()
{
return module;
}
/**
* Set the grid size (number of blocks per grid) for the function
* call.
*
* This corresponds to the first parameter in the runtime call:
*
*
* kernel<<<gridSize, blockSize,
* sharedMemSize, stream>>>(...);
*
*
*
* The default grid size is (1,1,1)
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param x The number of blocks per grid in x-direction
* @param y The number of blocks per grid in y-direction
* @return This instance
*/
public KernelLauncher setGridSize(int x, int y)
{
gridSize.x = x;
gridSize.y = y;
return this;
}
/**
* Set the grid size (number of blocks per grid) for the function
* call.
*
* This corresponds to the first parameter in the runtime call:
*
*
* kernel<<<gridSize, blockSize,
* sharedMemSize, stream>>>(...);
*
*
*
* The default grid size is (1,1,1)
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param x The number of blocks per grid in x-direction
* @param y The number of blocks per grid in y-direction
* @param z The number of blocks per grid in z-direction
* @return This instance
*/
public KernelLauncher setGridSize(int x, int y, int z)
{
gridSize.x = x;
gridSize.y = y;
gridSize.z = z;
return this;
}
/**
* Set the block size (number of threads per block) for the function
* call.
*
* This corresponds to the second parameter in the runtime call:
*
*
* kernel<<<gridSize, blockSize,
* sharedMemSize, stream>>>(...);
*
*
*
* The default block size is (1,1,1)
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param x The number of threads per block in x-direction
* @param y The number of threads per block in y-direction
* @param z The number of threads per block in z-direction
* @return This instance
*/
public KernelLauncher setBlockSize(int x, int y, int z)
{
blockSize.x = x;
blockSize.y = y;
blockSize.z = z;
return this;
}
/**
* Set the size of the shared memory for the function
* call.
*
* This corresponds to the third parameter in the runtime call:
*
*
* kernel<<<gridSize, blockSize,
* sharedMemSize, stream>>>(...);
*
*
*
* The default shared memory size is 0.
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param sharedMemSize The size of the shared memory, in bytes
* @return This instance
*/
public KernelLauncher setSharedMemSize(int sharedMemSize)
{
this.sharedMemSize = sharedMemSize;
return this;
}
/**
* Set the stream for the function call.
*
* This corresponds to the fourth parameter in the runtime call:
*
*
* kernel<<<gridSize, blockSize,
* sharedMemSize, stream>>>(...);
*
*
*
* The default stream is null (0).
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param stream The stream for the function call
* @return This instance
*/
public KernelLauncher setStream(CUstream stream) {
this.stream = stream;
return this;
}
/**
* Set the given grid size and block size for this KernelLauncher.
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3, int)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param gridSize The grid size (number of blocks per grid)
* @param blockSize The block size (number of threads per block)
* @return This instance
*/
public KernelLauncher setup(dim3 gridSize, dim3 blockSize)
{
return setup(gridSize, blockSize, sharedMemSize, stream);
}
/**
* Set the given grid size and block size and shared memory size
* for this KernelLauncher.
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int, CUstream)
*
* @param gridSize The grid size (number of blocks per grid)
* @param blockSize The block size (number of threads per block)
* @param sharedMemSize The size of the shared memory
* @return This instance
*/
public KernelLauncher setup(dim3 gridSize, dim3 blockSize,
int sharedMemSize)
{
return setup(gridSize, blockSize, sharedMemSize, stream);
}
public CUcontext context() {
return context;
}
/**
* Set the given grid size and block size, shared memory size
* and stream for this KernelLauncher.
*
* @see KernelLauncher#call(Object...)
* @see KernelLauncher#setup(dim3, dim3)
* @see KernelLauncher#setup(dim3, dim3, int)
*
* @param gridSize The grid size (number of blocks per grid)
* @param blockSize The block size (number of threads per block)
* @param sharedMemSize The size of the shared memory
* @param stream The stream for the kernel invocation
* @return This instance
*/
public KernelLauncher setup(dim3 gridSize, dim3 blockSize,
int sharedMemSize, CUstream stream) {
setGridSize(gridSize.x, gridSize.y);
setBlockSize(blockSize.x, blockSize.y, blockSize.z);
setSharedMemSize(sharedMemSize);
setStream(stream);
return this;
}
/**
* Call the function of this KernelLauncher with the current
* grid size, block size, shared memory size and stream, and
* with the given arguments.
*
* The given arguments must all be either of the type
* Pointer
, or of a primitive type except boolean.
* Otherwise, a CudaException will be thrown.
*
* @param args The arguments for the function call
* @throws CudaException if an argument with an invalid type
* was given, or one of the internal functions for setting
* up and executing the kernel failed.
*/
public synchronized void call(Object ... args) {
Pointer kernelParameters[] = new Pointer[args.length];
for (int i = 0; i < args.length; i++) {
Object arg = args[i];
if (arg instanceof Pointer)
{
Pointer argPointer = (Pointer)arg;
Pointer pointer = Pointer.to(argPointer);
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Pointer");
}
else if (arg instanceof Byte)
{
Byte value = (Byte)arg;
Pointer pointer = Pointer.to(new byte[]{value});
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Byte");
}
else if (arg instanceof Short)
{
Short value = (Short)arg;
Pointer pointer = Pointer.to(new short[]{value});
kernelParameters[i] = pointer;
// logger.info("argument " + i + " type is Short");
}
else if (arg instanceof Integer)
{
Integer value = (Integer)arg;
Pointer pointer = Pointer.to(new int[]{value});
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Integer");
}
else if (arg instanceof Long)
{
Long value = (Long)arg;
Pointer pointer = Pointer.to(new long[]{value});
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Long");
}
else if (arg instanceof Float)
{
Float value = (Float)arg;
Pointer pointer = Pointer.to(new float[]{value});
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Float");
}
else if (arg instanceof Double)
{
Double value = (Double)arg;
Pointer pointer = Pointer.to(new double[]{value});
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is Double");
}
else if (arg instanceof double[])
{
double[] value = (double[])arg;
Pointer pointer = Pointer.to(value);
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is double[]");
}
else if (arg instanceof float[])
{
float[] value = (float[])arg;
Pointer pointer = Pointer.to(value);
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is float[]");
}
else if (arg instanceof int[])
{
int[] value = (int[])arg;
Pointer pointer = Pointer.to(value);
kernelParameters[i] = pointer;
//logger.info("argument " + i + " type is int[]");
}
else if(arg instanceof jcuda.jcurand.curandGenerator) {
jcuda.jcurand.curandGenerator rng = (jcuda.jcurand.curandGenerator) arg;
kernelParameters[i] = Pointer.to(rng);
}
else
{
throw new CudaException(
"Type " + arg.getClass() + " may not be passed to a function");
}
}
checkResult(cuLaunchKernel(function,
gridSize.x, gridSize.y, gridSize.z,
blockSize.x, blockSize.y, blockSize.z,
sharedMemSize, stream,
Pointer.to(kernelParameters), null
));
syncContext();
}
/**
* Sync the current context for the thread
* Syncs the current context for the thread.
*/
public void syncContext() {
checkResult(JCudaDriver.cuCtxSetCurrent(ContextHolder.getInstance().getContext()));
checkResult(JCudaDriver.cuCtxAttach(ContextHolder.getInstance().getContext(),0));
checkResult(JCudaDriver.cuCtxSynchronize());
}
/**
* If the given result is not CUresult.CUDA_SUCCESS, then this method
* throws a CudaException with the error message for the given result.
*
* @param cuResult The result
* @throws CudaException if the result is not CUresult.CUDA_SUCCESS
*/
public static void checkResult(int cuResult)
{
if (cuResult != CUresult.CUDA_SUCCESS)
{
throw new CudaException(CUresult.stringFor(cuResult));
}
}
/**
* The extension of the given file name is replaced with "ptx".
* If the file with the resulting name does not exist or is older
* than the source file, it is compiled from the given file
* using NVCC. If the forceRebuild flag is 'true', then the PTX
* file is rebuilt even if it already exists or is newer than the
* source file. The name of the PTX file is returned.
*
* @param cuFileName The name of the .CU file
* @param forceRebuild Whether the PTX file should be re-created
* even if it exists already.
* @param nvccArguments Optional arguments for the NVCC
* @return The name of the PTX file
* @throws IOException If an I/O error occurs
* @throws CudaException If the creation of the PTX file fails
*/
private static String preparePtxFile(
String cuFileName, boolean forceRebuild, String ... nvccArguments)
throws IOException
{
logger.info("Preparing PTX for \n"+cuFileName);
File cuFile = new File(cuFileName);
if (!cuFile.exists())
{
throw new CudaException("Input file not found: "+cuFileName);
}
// Replace the file extension with "ptx"
String ptxFileName = null;
int lastIndex = cuFileName.lastIndexOf('.');
if (lastIndex == -1)
{
ptxFileName = cuFileName + ".ptx";
}
else
{
ptxFileName = cuFileName.substring(0, lastIndex)+".ptx";
}
// Return if the file already exists and should not be rebuilt
File ptxFile = new File(ptxFileName);
if (ptxFile.exists() && !forceRebuild)
{
long cuLastModified = cuFile.lastModified();
long ptxLastModified = ptxFile.lastModified();
if (cuLastModified < ptxLastModified)
{
return ptxFileName;
}
}
// Build the command line
String modelString = "-m"+System.getProperty("sun.arch.data.model");
String defaultArguments = "";
String optionalArguments = createArgumentsString(nvccArguments);
String command =
compilerPath + "nvcc " + modelString + " " + defaultArguments +
" " + optionalArguments + " -ptx "+
cuFile.getPath()+" -o "+ptxFileName;
// Execute the command line and wait for the output
logger.info("Executing\n" + command);
Process process = Runtime.getRuntime().exec(command);
String errorMessage =
new String(toByteArray(process.getErrorStream()));
String outputMessage =
new String(toByteArray(process.getInputStream()));
int exitValue = 0;
try
{
exitValue = process.waitFor();
}
catch (InterruptedException e)
{
Thread.currentThread().interrupt();
throw new CudaException(
"Interrupted while waiting for nvcc output", e);
}
logger.info("nvcc process exitValue "+exitValue);
if (exitValue != 0)
{
logger.error("errorMessage:\n"+errorMessage);
logger.error("outputMessage:\n"+outputMessage);
throw new CudaException(
"Could not create .ptx file: "+errorMessage);
}
return ptxFileName;
}
/**
* Creates a single string from the given argument strings
*
* @param nvccArguments The argument strings
* @return A single string containing the arguments
*/
private static String createArgumentsString(String ... nvccArguments)
{
if (nvccArguments == null || nvccArguments.length == 0)
{
return "";
}
StringBuilder sb = new StringBuilder();
for (String s : nvccArguments)
{
sb.append(s);
sb.append(" ");
}
return sb.toString();
}
/**
* Fully reads the given InputStream and returns it as a byte array.
*
* @param inputStream The input stream to read
* @return The byte array containing the data from the input stream
* @throws IOException If an I/O error occurs
*/
private static byte[] toByteArray(
InputStream inputStream) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte buffer[] = new byte[8192];
while (true)
{
int read = inputStream.read(buffer);
if (read == -1)
{
break;
}
baos.write(buffer, 0, read);
}
return baos.toByteArray();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy