All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.finmath.montecarlo.cuda.alternative.RandomVariableCudaWithFinalizer Maven / Gradle / Ivy

/*
 * (c) Copyright Christian P. Fries, Germany. All rights reserved. Contact: [email protected].
 *
 * Created on 09.02.2006
 */
package net.finmath.montecarlo.cuda.alternative;

import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoad;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.WeakReference;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.DoubleBinaryOperator;
import java.util.function.DoubleUnaryOperator;
import java.util.function.IntToDoubleFunction;
import java.util.logging.Logger;
import java.util.stream.DoubleStream;

import jcuda.LogLevel;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.CUmodule;
import jcuda.driver.JCudaDriver;
import net.finmath.functions.DoubleTernaryOperator;
import net.finmath.stochastic.RandomVariable;

/**
 * The class RandomVariableFromDoubleArray represents a random variable being the evaluation of a stochastic process
 * at a certain time within a Monte-Carlo simulation.
 * It is thus essentially a vector of floating point numbers - the realizations - together with a double - the time.
 * The index of the vector represents path.
 * The class may also be used for non-stochastic quantities which may potentially be stochastic
 * (e.g. volatility). If only non-stochastic random variables are involved in an operation the class uses
 * optimized code.
 *
 * Accesses performed exclusively through the interface
 * RandomVariable is thread safe (and does not mutate the class).
 *
 * This implementation uses floats for the realizations (consuming less memory compared to using doubles). However,
 * the calculation of the average is performed using double precision.
 *
 * @author Christian Fries
 * @version 1.8
 */
public class RandomVariableCudaWithFinalizer implements RandomVariable {

	private static final long serialVersionUID = 7620120320663270600L;

	private final double      time;	                // Time (filtration)

	// Data model for the stochastic case (otherwise null)
	private final CUdeviceptr	realizations;           // Realizations
	private final long			size;

	// Data model for the non-stochastic case (if realizations==null)
	private final double      valueIfNonStochastic;

	private static final ReferenceQueue referenceQueue = new ReferenceQueue();
	private static final Map, CUdeviceptr> referenceMap = new ConcurrentHashMap, CUdeviceptr>();

	private static final Logger logger = Logger.getLogger("net.finmath");

	public static final CUdevice device;
	public static final CUcontext context;

	private static final CUfunction capByScalar;
	private static final CUfunction floorByScalar;
	private static final CUfunction addScalar;
	private static final CUfunction subScalar;
	private static final CUfunction multScalar;
	private static final CUfunction divScalar;
	private static final CUfunction cuPow;
	private static final CUfunction cuSqrt;
	private static final CUfunction cuExp;
	private static final CUfunction cuLog;
	private static final CUfunction invert;
	private static final CUfunction cuAbs;
	private static final CUfunction cap;
	private static final CUfunction cuFloor;
	private static final CUfunction add;
	private static final CUfunction sub;
	private static final CUfunction mult;
	private static final CUfunction cuDiv;
	private static final CUfunction accrue;
	private static final CUfunction discount;
	private static final CUfunction reducePartial;

	private static final int reduceGridSize = 1024;

	// Initalize cuda
	static {
		// Enable exceptions and omit all subsequent error checks
		JCudaDriver.setExceptionsEnabled(true);
		JCudaDriver.setLogLevel(LogLevel.LOG_DEBUG);

		// Create the PTX file by calling the NVCC
		String ptxFileName = null;
		try {
			ptxFileName = preparePtxFile("RandomVariableCudaKernel.cu");
		} catch (final IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		// Initialize the driver and create a context for the first device.
		cuInit(0);
		device = new CUdevice();
		cuDeviceGet(device, 0);
		context = new CUcontext();
		cuCtxCreate(context, 0, device);

		// Load the ptx file.
		final CUmodule module = new CUmodule();
		cuModuleLoad(module, ptxFileName);

		// Obtain a function pointers
		capByScalar = new CUfunction();
		cuModuleGetFunction(capByScalar, module, "capByScalar");
		floorByScalar = new CUfunction();
		cuModuleGetFunction(floorByScalar, module, "floorByScalar");
		addScalar = new CUfunction();
		cuModuleGetFunction(addScalar, module, "addScalar");
		subScalar = new CUfunction();
		cuModuleGetFunction(subScalar, module, "subScalar");
		multScalar = new CUfunction();
		cuModuleGetFunction(multScalar, module, "multScalar");
		divScalar = new CUfunction();
		cuModuleGetFunction(divScalar, module, "divScalar");
		cuPow = new CUfunction();
		cuModuleGetFunction(cuPow, module, "cuPow");
		cuSqrt = new CUfunction();
		cuModuleGetFunction(cuSqrt, module, "cuSqrt");
		cuExp = new CUfunction();
		cuModuleGetFunction(cuExp, module, "cuExp");
		cuLog = new CUfunction();
		cuModuleGetFunction(cuLog, module, "cuLog");
		invert = new CUfunction();
		cuModuleGetFunction(invert, module, "invert");
		cuAbs = new CUfunction();
		cuModuleGetFunction(cuAbs, module, "cuAbs");
		cap = new CUfunction();
		cuModuleGetFunction(cap, module, "cap");
		cuFloor = new CUfunction();
		cuModuleGetFunction(cuFloor, module, "cuFloor");
		add = new CUfunction();
		cuModuleGetFunction(add, module, "add");
		sub = new CUfunction();
		cuModuleGetFunction(sub, module, "sub");
		mult = new CUfunction();
		cuModuleGetFunction(mult, module, "mult");
		cuDiv = new CUfunction();
		cuModuleGetFunction(cuDiv, module, "cuDiv");
		accrue = new CUfunction();
		cuModuleGetFunction(accrue, module, "accrue");
		discount = new CUfunction();
		cuModuleGetFunction(accrue, module, "discount");
		reducePartial = new CUfunction();
		cuModuleGetFunction(reducePartial, module, "reducePartial");
	}

	public RandomVariableCudaWithFinalizer(final double time, final CUdeviceptr realizations, final long size) {
		this.time = time;
		this.realizations = realizations;
		this.size = size;
		this.valueIfNonStochastic = Double.NaN;

		// Manage CUdeviceptr
		final WeakReference reference = new WeakReference(this, referenceQueue);
		referenceMap.put(reference, realizations);
	}

	/**
	 * Create a non stochastic random variable, i.e. a constant.
	 *
	 * @param value the value, a constant.
	 */
	public RandomVariableCudaWithFinalizer(final double value) {
		this(-Double.MAX_VALUE, value);
	}

	/**
	 * Create a non stochastic random variable, i.e. a constant.
	 *
	 * @param time the filtration time, set to 0.0 if not used.
	 * @param value the value, a constant.
	 */
	public RandomVariableCudaWithFinalizer(final double time, final double value) {
		super();
		this.time = time;
		this.realizations = null;
		this.size = 1;
		this.valueIfNonStochastic = value;
	}

	/**
	 * Create a stochastic random variable.
	 *
	 * @param time the filtration time, set to 0.0 if not used.
	 * @param realisations the vector of realizations.
	 */
	public RandomVariableCudaWithFinalizer(final double time, final float[] realisations) {
		super();
		this.time = time;
		this.size = realisations.length;
		this.realizations = createCUdeviceptr(realisations);
		this.valueIfNonStochastic = Double.NaN;
	}

	/**
	 * Create a stochastic random variable.
	 *
	 * @param time the filtration time, set to 0.0 if not used.
	 * @param realisations the vector of realizations.
	 */
	public RandomVariableCudaWithFinalizer(final double time, final double[] realisations) {
		this(time, getFloatArray(realisations));
	}

	private CUdeviceptr createCUdeviceptr(final long size) {
		final CUdeviceptr cuDevicePtr = getCUdeviceptr(size);
		return cuDevicePtr;
	}

	public static CUdeviceptr getCUdeviceptr(final long size) {
		CUdeviceptr cuDevicePtr = new CUdeviceptr();
		final int succ = JCudaDriver.cuMemAlloc(cuDevicePtr, size * Sizeof.FLOAT);
		if(succ != 0) {
			cuDevicePtr = null;
			logger.finest("Failed creating device vector "+ cuDevicePtr + " with size=" + size);
		}
		else {
			logger.finest("Creating device vector "+ cuDevicePtr + " with size=" + size);
		}

		return cuDevicePtr;
	}

	/**
	 * Create a vector on device and copy host vector to it.
	 *
	 * @param values Host vector.
	 * @return Pointer to device vector.
	 */
	private CUdeviceptr createCUdeviceptr(final float[] values) {
		final CUdeviceptr cuDevicePtr = createCUdeviceptr(values.length);
		JCudaDriver.cuMemcpyHtoD(cuDevicePtr, Pointer.to(values),
				(long)values.length * Sizeof.FLOAT);
		return cuDevicePtr;
	}

	@Override
	protected void finalize() throws Throwable {
		System.out.println("Finalizing " + realizations);
		if(realizations != null) {
			JCudaDriver.cuMemFree(realizations);
		}
		super.finalize();
	}


	private static float[] getFloatArray(final double[] arrayOfDouble) {
		final float[] arrayOfFloat = new float[arrayOfDouble.length];
		for(int i=0; i quantileEnd) {
			return getQuantileExpectation(quantileEnd, quantileStart);
		}

		throw new UnsupportedOperationException();
		/*
		float[] realizationsSorted = realizations.clone();
		java.util.Arrays.sort(realizationsSorted);

		int indexOfQuantileValueStart	= Math.min(Math.max((int)Math.round((size()+1) * quantileStart - 1), 0), size()-1);
		int indexOfQuantileValueEnd		= Math.min(Math.max((int)Math.round((size()+1) * quantileEnd - 1), 0), size()-1);

		double quantileExpectation = 0.0;
		for (int i=indexOfQuantileValueStart; i<=indexOfQuantileValueEnd;i++) {
			quantileExpectation += realizationsSorted[i];
		}
		quantileExpectation /= indexOfQuantileValueEnd-indexOfQuantileValueStart+1;

		return quantileExpectation;
		 */
	}

	@Override
	public double[] getHistogram(final double[] intervalPoints)
	{
		throw new UnsupportedOperationException();
		/*
		double[] histogramValues = new double[intervalPoints.length+1];

		if(isDeterministic()) {
			java.util.Arrays.fill(histogramValues, 0.0);
			for (int intervalIndex=0; intervalIndex intervalPoints[intervalIndex]) {
					histogramValues[intervalIndex] = 1.0;
					break;
				}
			}
			histogramValues[intervalPoints.length] = 1.0;
		}
		else {
			float[] realizationsSorted = realizations.clone();
			java.util.Arrays.sort(realizationsSorted);

			int sampleIndex=0;
			for (int intervalIndex=0; intervalIndex 0) {
				for(int i=0; i 1) {
			reduced = reduced.reduceBySize(reduceGridSize);
		}
		return reduced.getRealizations()[0];
	}

	private RandomVariableCudaWithFinalizer reduceBySize(final int bySize) {
		final int blockSizeX = bySize;
		final int gridSizeX = (int)Math.ceil((double)size()/2 / blockSizeX);
		final CUdeviceptr reduceVector = getCUdeviceptr(gridSizeX);

		callCudaFunction(reducePartial, new Pointer[] {
				Pointer.to(new int[] { size() }),
				Pointer.to(realizations),
				Pointer.to(reduceVector)},
				gridSizeX, blockSizeX, blockSizeX);

		return new RandomVariableCudaWithFinalizer(0.0, reduceVector, gridSizeX);
	}

	private CUdeviceptr callCudaFunction(final CUfunction function, final Pointer[] arguments) {
		// Allocate device output memory
		final CUdeviceptr result = getCUdeviceptr(size());
		arguments[arguments.length-1] = Pointer.to(result);

		final int blockSizeX = 256;
		final int gridSizeX = (int)Math.ceil((double)size() / blockSizeX);
		callCudaFunction(function, arguments, gridSizeX, blockSizeX, 0);
		return result;
	}

	private CUdeviceptr callCudaFunction(final CUfunction function, final Pointer[] arguments, final int gridSizeX, final int blockSizeX, final int sharedMemorySize) {
		// Allocate device output memory
		final CUdeviceptr result = getCUdeviceptr(size());
		arguments[arguments.length-1] = Pointer.to(result);

		// Set up the kernel parameters: A pointer to an array
		// of pointers which point to the actual values.
		final Pointer kernelParameters = Pointer.to(arguments);

		// Call the kernel function.
		cuLaunchKernel(function,
				gridSizeX,  1, 1,      // Grid dimension
				blockSizeX, 1, 1,      // Block dimension
				sharedMemorySize, null,               // Shared memory size and stream
				kernelParameters, null // Kernel- and extra parameters
				);
		cuCtxSynchronize();
		return result;
	}

	/**
	 * The extension of the given file name is replaced with "ptx".
	 * If the file with the resulting name does not exist, it is
	 * compiled from the given file using NVCC. The name of the
	 * PTX file is returned.
	 *
	 * @param cuFileName The name of the .CU file
	 * @return The name of the PTX file
	 * @throws IOException If an I/O error occurs
	 */
	private static String preparePtxFile(final String cuFileName) throws IOException
	{
		int endIndex = cuFileName.lastIndexOf('.');
		if (endIndex == -1)
		{
			endIndex = cuFileName.length()-1;
		}
		final String ptxFileName = cuFileName.substring(0, endIndex+1)+"ptx";
		final File ptxFile = new File(ptxFileName);
		if (ptxFile.exists()) {
			return ptxFileName;
		}

		final File cuFile = new File(cuFileName);
		if (!cuFile.exists()) {
			throw new IOException("Input file not found: "+cuFileName);
		}
		final String modelString = "-m"+System.getProperty("sun.arch.data.model");
		final String command =
				"nvcc " + modelString + " -ptx "+
						cuFile.getPath()+" -o "+ptxFileName;

		System.out.println("Executing\n"+command);
		final Process process = Runtime.getRuntime().exec(command);

		final String errorMessage =
				new String(toByteArray(process.getErrorStream()));
		final String outputMessage =
				new String(toByteArray(process.getInputStream()));
		int exitValue = 0;
		try
		{
			exitValue = process.waitFor();
		}
		catch (final InterruptedException e)
		{
			Thread.currentThread().interrupt();
			throw new IOException(
					"Interrupted while waiting for nvcc output", e);
		}

		if (exitValue != 0)
		{
			System.out.println("nvcc process exitValue "+exitValue);
			System.out.println("errorMessage:\n"+errorMessage);
			System.out.println("outputMessage:\n"+outputMessage);
			throw new IOException(
					"Could not create .ptx file: "+errorMessage);
		}

		System.out.println("Finished creating PTX file");
		return ptxFileName;
	}

	/**
	 * Fully reads the given InputStream and returns it as a byte array
	 *
	 * @param inputStream The input stream to read
	 * @return The byte array containing the data from the input stream
	 * @throws IOException If an I/O error occurs
	 */
	private static byte[] toByteArray(final InputStream inputStream)
			throws IOException
	{
		final ByteArrayOutputStream baos = new ByteArrayOutputStream();
		final byte buffer[] = new byte[8192];
		while (true)
		{
			final int read = inputStream.read(buffer);
			if (read == -1)
			{
				break;
			}
			baos.write(buffer, 0, read);
		}
		return baos.toByteArray();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy