All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jcuda.utils.KernelLauncher Maven / Gradle / Ivy

There is a newer version: 0.4-rc3.7
Show newest version
/*
 * JCudaUtils - Utilities for JCuda 
 * http://www.jcuda.org
 *
 * Copyright (c) 2010 Marco Hutter - http://www.jcuda.org
 * 
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

package jcuda.utils;

import static jcuda.driver.JCudaDriver.cuDeviceGetCount;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoadDataEx;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import jcuda.CudaException;
import jcuda.Pointer;
import jcuda.driver.CUcontext;
import jcuda.driver.CUfunction;
import jcuda.driver.CUmodule;
import jcuda.driver.CUresult;
import jcuda.driver.CUstream;
import jcuda.driver.JCudaDriver;
import jcuda.runtime.JCuda;
import jcuda.runtime.dim3;

import org.nd4j.linalg.jcublas.SimpleJCublas;
import org.nd4j.linalg.jcublas.context.ContextHolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is a utility class that simplifies the setup and launching
 * of CUDA kernels using the JCuda Driver API. 
*
* Instances of this class may be created using one of the following * methods:
*
    *
  • * {@link KernelLauncher#compile(String, String, String...)} will * compile a kernel from a String containing the CUDA source code *
  • *
  • * {@link KernelLauncher#create(String, String, String...)} will * create a kernel for a function that is contained in a CUDA * source file *
  • *
  • * {@link KernelLauncher#load(String, String)} will load a kernel from * a PTX or CUBIN (CUDA binary) file. *
  • *
  • * {@link KernelLauncher#load(InputStream, String)} will load a kernel * from PTX- or CUBIN data which is provided via an InputStream * (useful for packaging PTX- or CUBIN files into JAR archives)
    *
  • *
* *
* These instances may then be used to call a kernel function with * the {@link KernelLauncher#call(Object...)} method. The actual * kernel function arguments which are passed to this method * will be set up automatically, and aligned appropriately for * their respective size.
*
* The setup of the execution may be performed similarly as the invocation * of a kernel when using the Runtime API in C. Such a call has the * form
* *     kernel<<<gridDim, blockDim, * sharedMemorySize, stream>>>(...); * *
* where *
    *
  • * gridDim is a dim3 which specifies the number of blocks per * grid *
  • *
  • * blockDim is a dim3 that specifies the number of threads * per block *
  • *
  • * sharedMemorySize is the size of the shared memory for * the kernel *
  • *
  • * stream is a stream for asynchronous kernel execution *
  • *
* Similarly, the KernelLauncher allows specifying these parameters * in the {@link KernelLauncher#setup(dim3, dim3, int, CUstream)} * method:
*
* *     kernelLauncher.setup(gridDim, * blockDim, sharedMemorySize, stream).call(...); * *
*
* When default values for some of the parameters should be used, * one of the overloaded versions of the setup method may be called: *
*
* *     kernelLauncher.setup(gridDim, * blockDim).call(kernel); * *
*
* The parameters may also be set individually:
*
* *     kernelLauncher.setGridSize(gridSize);
*     kernelLauncher.setBlockSize(blockSize);
*     kernelLauncher.call(...); *
*
*/ public class KernelLauncher { /** * The logger used in this class */ private static final Logger logger = LoggerFactory.getLogger(KernelLauncher.class.getName()); /** * The path prefix, containing the path to the NVCC compiler. * Not required if the path to the NVCC is present in an * environment variable. */ private static String compilerPath = ""; /** * The number of the device which should be used by the * KernelLauncher */ private int deviceNumber = 0; /** * Set the path to the NVCC compiler. For example:
* setCompilerPath("C:/CUDA/bin"); *
* By default, this path is empty, assuming that the compiler * is in a path that is visible via an environment variable. * * @param path The path to the NVCC compiler. */ public static void setCompilerPath(String path) { if (path == null) { compilerPath = ""; } compilerPath = path; if (!compilerPath.endsWith(File.separator)) { compilerPath += File.separator; } } /** * Set the number (index) of the device which should be used * by the KernelLauncher * * @param number The number of the device to use * @throws CudaException If number < 0 or number >= deviceCount */ public void setDeviceNumber(int number) { int count[] = new int[1]; cuDeviceGetCount(count); if (number < 0) { throw new CudaException( "Invalid device number: " + number + ". "+ "There are only " + count[0] + " devices available"); } deviceNumber = number; } /** * Create a new KernelLauncher for the function with the given * name, that is defined in the given source code.
*
* The source code is stored in a temporary .CU CUDA source file, * and a PTX file is compiled from this source file using the * NVCC (NVIDIA CUDA C Compiler) in a separate process. * The optional nvccArguments are passed to the NVCC.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the * NVCC.EXE has to be in a directory that is contained in the PATH * environment variable. Alternatively, the path to the NVCC may * be specified by calling {@link KernelLauncher#setCompilerPath(String)} * with the respective path.
*
* Note: In order to make the function accessible * by the name it has in the source code, the function has to * be declared as an extern "C" function:
*
* * extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
* * * @see KernelLauncher#create(String, String, String...) * @see KernelLauncher#create(String, String, boolean, String...) * @see KernelLauncher#load(String, String) * * @param sourceCode The source code containing the function * @param functionName The name of the function. * @param nvccArguments Optional arguments for the NVCC * @return The KernelLauncher for the specified function * @throws CudaException If the creation of the CU- or PTX file * fails, or the PTX may not be loaded, or the specified * function can not be obtained. */ public static KernelLauncher compile( String sourceCode, String functionName, String ... nvccArguments) { File cuFile = null; try { cuFile = File.createTempFile("temp_JCuda_", ".cu"); } catch (IOException e) { throw new CudaException("Could not create temporary .cu file", e); } String cuFileName = cuFile.getPath(); FileOutputStream fos = null; try { fos = new FileOutputStream(cuFile); fos.write(sourceCode.getBytes()); } catch (IOException e) { throw new CudaException("Could not write temporary .cu file", e); } finally { if (fos != null) { try { fos.close(); } catch (IOException e) { throw new CudaException( "Could not close temporary .cu file", e); } } } return create(cuFileName, functionName, nvccArguments); } /** * Create a new KernelLauncher for the function with the given * name, that is contained in the .CU CUDA source file with the * given name.
*
* Note: In order to make the function accessible * by the name it has in the source code, the function has to * be declared as an extern "C" function:
*
* * extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
*
* The extension of the given file name is replaced with "ptx". * If the PTX file with the resulting name does not exist, * or is older than the .CU file, it is compiled from * the specified source file using the NVCC (NVIDIA CUDA C * Compiler) in a separate process. The optional nvccArguments * are passed to the NVCC.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the * NVCC.EXE has to be in a directory that is contained in the PATH * environment variable. Alternatively, the path to the NVCC may * be specified by calling {@link KernelLauncher#setCompilerPath(String)} * with the respective path.
* * @see KernelLauncher#compile(String, String, String...) * @see KernelLauncher#create(String, String, boolean, String...) * @see KernelLauncher#load(String, String) * @see KernelLauncher#load(InputStream, String) * * @param cuFileName The name of the source file. * @param functionName The name of the function. * @param nvccArguments Optional arguments for the NVCC * @return The KernelLauncher for the specified function * @throws CudaException If the creation of the PTX file fails, * or the PTX may not be loaded, or the specified function can * not be obtained. */ public static KernelLauncher create( String cuFileName, String functionName, String ... nvccArguments) { return create(cuFileName, functionName, false, nvccArguments); } /** * Create a new KernelLauncher for the function with the given * name, that is contained in the .CU CUDA source file with the * given name.
*
* Note: In order to make the function accessible * by the name it has in the source code, the function has to * be declared as an extern "C" function:
*
* * extern "C"
* __global__ void functionName(...)
* {
* ...
* }
*
*
* The extension of the given file name is replaced with "ptx". * If the PTX file with the resulting name does not exist, * or is older than the .CU file, it is compiled from * the specified source file using the NVCC (NVIDIA CUDA C * Compiler) in a separate process. The optional nvccArguments * are passed to the NVCC.
*
* If the forceRebuild flag is 'true', then the * PTX file will be recompiled from the given source file, * even if it already existed or was newer than the source * file, and the already existing PTX file will be * overwritten.
*
* The NVCC has to be in a visible directory. E.g. for Windows, the * NVCC.EXE has to be in a directory that is contained in the PATH * environment variable. Alternatively, the path to the NVCC may * be specified by calling {@link KernelLauncher#setCompilerPath(String)} * with the respective path.
* * @see KernelLauncher#compile(String, String, String...) * @see KernelLauncher#create(String, String, String...) * @see KernelLauncher#load(String, String) * @see KernelLauncher#load(InputStream, String) * * @param cuFileName The name of the source file. * @param functionName The name of the function. * @param forceRebuild Whether the PTX file should be recompiled * and overwritten if it already exists. * @param nvccArguments Optional arguments for the NVCC * @return The KernelLauncher for the specified function * @throws CudaException If the creation of the PTX file fails, * or the PTX may not be loaded, or the specified function can * not be obtained. */ public static KernelLauncher create( String cuFileName, String functionName, boolean forceRebuild, String ... nvccArguments) { // Prepare the PTX file for the CU source file String ptxFileName = null; try { ptxFileName = preparePtxFile(cuFileName, forceRebuild, nvccArguments); } catch (IOException e) { throw new CudaException( "Could not prepare PTX for source file '"+cuFileName+"'", e); } KernelLauncher kernelLauncher = new KernelLauncher(); byte ptxData[] = loadData(ptxFileName); kernelLauncher.initModule(ptxData); kernelLauncher.initFunction(functionName); return kernelLauncher; } /** * Create a new KernelLauncher which may be used to execute the * specified function which is loaded from the PTX- or CUBIN * (CUDA binary) file with the given name. * * @see KernelLauncher#compile(String, String, String...) * @see KernelLauncher#create(String, String, boolean, String...) * @see KernelLauncher#load(String, String) * @see KernelLauncher#load(InputStream, String) * * @param moduleFileName The name of the PTX- or CUBIN file * @param functionName The name of the function * @return The KernelLauncher for the specified function * @throws CudaException If the PTX- or CUBIN may not be loaded, * or the specified function can not be obtained. */ public static KernelLauncher load( String moduleFileName, String functionName) { KernelLauncher kernelLauncher = new KernelLauncher(); byte moduleData[] = loadData(moduleFileName); kernelLauncher.initModule(moduleData); kernelLauncher.initFunction(functionName); return kernelLauncher; } /** * Create a new KernelLauncher which may be used to execute the * specified function which is loaded from the PTX- or CUBIN * data that is read from the given input stream. * * @see KernelLauncher#compile(String, String, String...) * @see KernelLauncher#create(String, String, boolean, String...) * @see KernelLauncher#load(String, String) * @see KernelLauncher#load(InputStream, String) * * @param moduleInputStream The stream for the PTX- or CUBIN data * @param functionName The name of the function * @return The KernelLauncher for the specified function * @throws CudaException If the PTX- or CUBIN may not be loaded, * or the specified function can not be obtained. */ public static KernelLauncher load( InputStream moduleInputStream, String functionName) { KernelLauncher kernelLauncher = new KernelLauncher(); byte moduleData[] = loadData(moduleInputStream); kernelLauncher.initModule(moduleData); kernelLauncher.initFunction(functionName); return kernelLauncher; } /** * Load the data from the file with the given name and returns * it as a 0-terminated byte array * * @param fileName The name of the file * @return The data from the file */ private static byte[] loadData(String fileName) { InputStream inputStream = null; try { inputStream= new FileInputStream(new File(fileName)); return loadData(inputStream); } catch (FileNotFoundException e) { throw new CudaException( "Could not open '"+fileName+"'", e); } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { throw new CudaException( "Could not close '"+fileName+"'", e); } } } } /** * Reads the data from the given inputStream and returns it as * a 0-terminated byte array. * * @param inputStream The inputStream to read * @return The data from the inputStream */ private static byte[] loadData(InputStream inputStream) { ByteArrayOutputStream baos = null; try { baos = new ByteArrayOutputStream(); byte buffer[] = new byte[8192]; while (true) { int read = inputStream.read(buffer); if (read == -1) { break; } baos.write(buffer, 0, read); } baos.write('\0'); baos.flush(); return baos.toByteArray(); } catch (IOException e) { throw new CudaException( "Could not load data", e); } finally { if (baos != null) { try { baos.close(); } catch (IOException e) { throw new CudaException( "Could not close output", e); } } } } /** * The context which was used to create this instance */ private CUcontext context; /** * The module which contains the function */ private CUmodule module; /** * The function which is executed with this KernelLauncher */ private CUfunction function; /** * The current block size (number of threads per block) * which will be used for the function call. */ private dim3 blockSize = new dim3(1,1,1); /** * The current grid size (number of blocks per grid) * which will be used for the function call. */ private dim3 gridSize = new dim3(1,1,1); /** * The currently specified size of the shared memory * for the function call. */ private int sharedMemSize = 0; /** * The stream that should be associated with the function call. */ private CUstream stream; /** * Private constructor. Instantiation only via the static * methods. */ private KernelLauncher() { initialize(); } /** * Initializes this KernelLauncher. This method will try to * initialize the JCuda driver API. Then it will try to * attach to the current CUDA context. If no active CUDA * context exists, then it will try to create one, for * the device which is specified by the current * deviceNumber. * * @throws CudaException If it is neither possible to * attach to an existing context, nor to create a new * context. */ private void initialize() { context = ContextHolder.getInstance().getContext(deviceNumber); } /** * Sync the context for the current thread. */ public static void setContext() { JCudaDriver.cuCtxSetCurrent(ContextHolder.getInstance().getContext()); JCuda.cudaSetDevice(ContextHolder.getInstance().getDeviceForThread()); JCudaDriver.cuCtxSynchronize(); JCuda.cudaDeviceSynchronize(); } /** * Create a new KernelLauncher which uses the same module as * this KernelLauncher, but may be used to execute a different * function. All parameters (grid size, block size, shared * memory size and stream) of the returned KernelLauncher * will be independent of 'this' one and initially contain * the default values. * * @param functionName The name of the function * @return The KernelLauncher for the specified function * @throws CudaException If the specified function can not * be obtained from the module of this KernelLauncher. */ public KernelLauncher forFunction(String functionName) { KernelLauncher kernelLauncher = new KernelLauncher(); kernelLauncher.module = this.module; kernelLauncher.initFunction(functionName); return kernelLauncher; } /** * Initialize the module for this KernelLauncher by loading * the PTX- or CUBIN file with the given name. * * @param moduleData The data from the PTX- or CUBIN file */ private void initModule(byte moduleData[]) { module = new CUmodule(); cuModuleLoadDataEx(module, Pointer.to(moduleData), 0, new int[0], Pointer.to(new int[0])); } /** * Initialize this KernelLauncher for calling the function with * the given name, which is contained in the module of this * KernelLauncher * * @param functionName The name of the function */ private void initFunction(String functionName) { // Obtain the function from the module function = new CUfunction(); String functionErrorString = "Could not get function '"+functionName+"' from module. "+"\n"+ "Name in module might be mangled. Try adding the line "+"\n"+ "extern \"C\""+"\n"+ "before the function you want to call, or open the " + "PTX/CUBIN "+"\n"+"file with a text editor to find out " + "the mangled function name"; try { int result = cuModuleGetFunction(function, module, functionName); if (result != CUresult.CUDA_SUCCESS) { throw new CudaException(functionErrorString); } } catch (CudaException e) { throw new CudaException(functionErrorString, e); } } /** * Returns the module that was created from the PTX- or CUBIN file, and * which contains the function that should be executed. This * module may also be used to access symbols and texture * references. However, clients should not modify or unload * the module. * * @return The CUmodule */ public CUmodule getModule() { return module; } /** * Set the grid size (number of blocks per grid) for the function * call.
*
* This corresponds to the first parameter in the runtime call:
*
* * kernel<<<gridSize, blockSize, * sharedMemSize, stream>>>(...); * *
*
* The default grid size is (1,1,1) * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param x The number of blocks per grid in x-direction * @param y The number of blocks per grid in y-direction * @return This instance */ public KernelLauncher setGridSize(int x, int y) { gridSize.x = x; gridSize.y = y; return this; } /** * Set the grid size (number of blocks per grid) for the function * call.
*
* This corresponds to the first parameter in the runtime call:
*
* * kernel<<<gridSize, blockSize, * sharedMemSize, stream>>>(...); * *
*
* The default grid size is (1,1,1) * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param x The number of blocks per grid in x-direction * @param y The number of blocks per grid in y-direction * @param z The number of blocks per grid in z-direction * @return This instance */ public KernelLauncher setGridSize(int x, int y, int z) { gridSize.x = x; gridSize.y = y; gridSize.z = z; return this; } /** * Set the block size (number of threads per block) for the function * call.
*
* This corresponds to the second parameter in the runtime call:
*
* * kernel<<<gridSize, blockSize, * sharedMemSize, stream>>>(...); * *
*
* The default block size is (1,1,1) * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param x The number of threads per block in x-direction * @param y The number of threads per block in y-direction * @param z The number of threads per block in z-direction * @return This instance */ public KernelLauncher setBlockSize(int x, int y, int z) { blockSize.x = x; blockSize.y = y; blockSize.z = z; return this; } /** * Set the size of the shared memory for the function * call.
*
* This corresponds to the third parameter in the runtime call:
*
* * kernel<<<gridSize, blockSize, * sharedMemSize, stream>>>(...); * *
*
* The default shared memory size is 0. * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param sharedMemSize The size of the shared memory, in bytes * @return This instance */ public KernelLauncher setSharedMemSize(int sharedMemSize) { this.sharedMemSize = sharedMemSize; return this; } /** * Set the stream for the function call.
*
* This corresponds to the fourth parameter in the runtime call:
*
* * kernel<<<gridSize, blockSize, * sharedMemSize, stream>>>(...); * *
*
* The default stream is null (0). * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param stream The stream for the function call * @return This instance */ public KernelLauncher setStream(CUstream stream) { this.stream = stream; return this; } /** * Set the given grid size and block size for this KernelLauncher. * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3, int) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param gridSize The grid size (number of blocks per grid) * @param blockSize The block size (number of threads per block) * @return This instance */ public KernelLauncher setup(dim3 gridSize, dim3 blockSize) { return setup(gridSize, blockSize, sharedMemSize, stream); } /** * Set the given grid size and block size and shared memory size * for this KernelLauncher. * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int, CUstream) * * @param gridSize The grid size (number of blocks per grid) * @param blockSize The block size (number of threads per block) * @param sharedMemSize The size of the shared memory * @return This instance */ public KernelLauncher setup(dim3 gridSize, dim3 blockSize, int sharedMemSize) { return setup(gridSize, blockSize, sharedMemSize, stream); } public CUcontext context() { return context; } /** * Set the given grid size and block size, shared memory size * and stream for this KernelLauncher. * * @see KernelLauncher#call(Object...) * @see KernelLauncher#setup(dim3, dim3) * @see KernelLauncher#setup(dim3, dim3, int) * * @param gridSize The grid size (number of blocks per grid) * @param blockSize The block size (number of threads per block) * @param sharedMemSize The size of the shared memory * @param stream The stream for the kernel invocation * @return This instance */ public KernelLauncher setup(dim3 gridSize, dim3 blockSize, int sharedMemSize, CUstream stream) { setGridSize(gridSize.x, gridSize.y); setBlockSize(blockSize.x, blockSize.y, blockSize.z); setSharedMemSize(sharedMemSize); setStream(stream); return this; } /** * Call the function of this KernelLauncher with the current * grid size, block size, shared memory size and stream, and * with the given arguments.
*
* The given arguments must all be either of the type * Pointer, or of a primitive type except boolean. * Otherwise, a CudaException will be thrown. * * @param args The arguments for the function call * @throws CudaException if an argument with an invalid type * was given, or one of the internal functions for setting * up and executing the kernel failed. */ public void call(Object ... args) { Pointer kernelParameters[] = new Pointer[args.length]; for (int i = 0; i < args.length; i++) { Object arg = args[i]; if (arg instanceof Pointer) { Pointer argPointer = (Pointer)arg; Pointer pointer = Pointer.to(argPointer); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Pointer"); } else if (arg instanceof Byte) { Byte value = (Byte)arg; Pointer pointer = Pointer.to(new byte[]{value}); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Byte"); } else if (arg instanceof Short) { Short value = (Short)arg; Pointer pointer = Pointer.to(new short[]{value}); kernelParameters[i] = pointer; // logger.info("argument " + i + " type is Short"); } else if (arg instanceof Integer) { Integer value = (Integer)arg; Pointer pointer = Pointer.to(new int[]{value}); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Integer"); } else if (arg instanceof Long) { Long value = (Long)arg; Pointer pointer = Pointer.to(new long[]{value}); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Long"); } else if (arg instanceof Float) { Float value = (Float)arg; Pointer pointer = Pointer.to(new float[]{value}); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Float"); } else if (arg instanceof Double) { Double value = (Double)arg; Pointer pointer = Pointer.to(new double[]{value}); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is Double"); } else if (arg instanceof double[]) { double[] value = (double[])arg; Pointer pointer = Pointer.to(value); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is double[]"); } else if (arg instanceof float[]) { float[] value = (float[])arg; Pointer pointer = Pointer.to(value); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is float[]"); } else if (arg instanceof int[]) { int[] value = (int[])arg; Pointer pointer = Pointer.to(value); kernelParameters[i] = pointer; //logger.info("argument " + i + " type is int[]"); } else if(arg instanceof jcuda.jcurand.curandGenerator) { jcuda.jcurand.curandGenerator rng = (jcuda.jcurand.curandGenerator) arg; kernelParameters[i] = Pointer.to(rng); } else { throw new CudaException( "Type " + arg.getClass() + " may not be passed to a function"); } } cuLaunchKernel(function, gridSize.x, gridSize.y, gridSize.z, blockSize.x, blockSize.y, blockSize.z, sharedMemSize, stream, Pointer.to(kernelParameters), null ); ContextHolder.syncStream(); SimpleJCublas.sync(); } /** * Sync the current context for the thread * Syncs the current context for the thread. */ public void syncContext() { JCudaDriver.cuCtxSetCurrent(ContextHolder.getInstance().getContext()); JCudaDriver.cuCtxAttach(ContextHolder.getInstance().getContext(), 0); JCudaDriver.cuCtxSynchronize(); } /** * The extension of the given file name is replaced with "ptx". * If the file with the resulting name does not exist or is older * than the source file, it is compiled from the given file * using NVCC. If the forceRebuild flag is 'true', then the PTX * file is rebuilt even if it already exists or is newer than the * source file. The name of the PTX file is returned. * * @param cuFileName The name of the .CU file * @param forceRebuild Whether the PTX file should be re-created * even if it exists already. * @param nvccArguments Optional arguments for the NVCC * @return The name of the PTX file * @throws IOException If an I/O error occurs * @throws CudaException If the creation of the PTX file fails */ private static String preparePtxFile( String cuFileName, boolean forceRebuild, String ... nvccArguments) throws IOException { logger.info("Preparing PTX for \n"+cuFileName); File cuFile = new File(cuFileName); if (!cuFile.exists()) throw new CudaException("Input file not found: " + cuFileName); // Replace the file extension with "ptx" String ptxFileName = null; int lastIndex = cuFileName.lastIndexOf('.'); if (lastIndex == -1) ptxFileName = cuFileName + ".ptx"; else ptxFileName = cuFileName.substring(0, lastIndex)+".ptx"; // Return if the file already exists and should not be rebuilt File ptxFile = new File(ptxFileName); if (ptxFile.exists() && !forceRebuild) { long cuLastModified = cuFile.lastModified(); long ptxLastModified = ptxFile.lastModified(); if (cuLastModified < ptxLastModified) return ptxFileName; } // Build the command line String modelString = "-m"+System.getProperty("sun.arch.data.model"); String defaultArguments = ""; String optionalArguments = createArgumentsString(nvccArguments); String command = compilerPath + "nvcc " + modelString + " " + defaultArguments + " " + optionalArguments + " -ptx "+ cuFile.getPath()+" -o "+ptxFileName; // Execute the command line and wait for the output logger.info("Executing\n" + command); Process process = Runtime.getRuntime().exec(command); String errorMessage = new String(toByteArray(process.getErrorStream())); String outputMessage = new String(toByteArray(process.getInputStream())); int exitValue = 0; try { exitValue = process.waitFor(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new CudaException( "Interrupted while waiting for nvcc output", e); } logger.info("nvcc process exitValue "+exitValue); if (exitValue != 0) { logger.error("errorMessage:\n"+errorMessage); logger.error("outputMessage:\n"+outputMessage); throw new CudaException( "Could not create .ptx file: "+errorMessage); } return ptxFileName; } /** * Creates a single string from the given argument strings * * @param nvccArguments The argument strings * @return A single string containing the arguments */ private static String createArgumentsString(String ... nvccArguments) { if (nvccArguments == null || nvccArguments.length == 0) { return ""; } StringBuilder sb = new StringBuilder(); for (String s : nvccArguments) { sb.append(s); sb.append(" "); } return sb.toString(); } /** * Fully reads the given InputStream and returns it as a byte array. * * @param inputStream The input stream to read * @return The byte array containing the data from the input stream * @throws IOException If an I/O error occurs */ private static byte[] toByteArray( InputStream inputStream) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte buffer[] = new byte[8192]; while (true) { int read = inputStream.read(buffer); if (read == -1) { break; } baos.write(buffer, 0, read); } return baos.toByteArray(); } @Override public String toString() { return "KernelLauncher{" + "deviceNumber=" + deviceNumber + ", context=" + context + ", module=" + module + ", function=" + function + ", blockSize=" + blockSize + ", gridSize=" + gridSize + ", sharedMemSize=" + sharedMemSize + ", stream=" + stream + '}'; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy