All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jcuda.jcufft.JCufft Maven / Gradle / Ivy

There is a newer version: 0.4-rc3.7
Show newest version
/*
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 *
 */

package jcuda.jcufft;

import jcuda.*;
import jcuda.runtime.JCuda;
import jcuda.runtime.cudaError;
import jcuda.runtime.cudaMemcpyKind;
import jcuda.runtime.cudaStream_t;

/**
 * Java bindings for CUFFT, the NVIDIA CUDA FFT library.
*
* Most comments are taken from the CUFFT library documentation
*
*/ public class JCufft { /** * CUFFT transform direction */ public static final int CUFFT_FORWARD = -1; /** * CUFFT transform direction */ public static final int CUFFT_INVERSE = 1; /** * The flag that indicates whether the native library has been * loaded */ private static boolean initialized = false; /** * Whether a CudaException should be thrown if a method is about * to return a result code that is not cufftResult.CUFFT_SUCCESS */ private static boolean exceptionsEnabled = false; /* Private constructor to prevent instantiation */ private JCufft() { } // Initialize the native library. static { initialize(); } /** * Initializes the native library. Note that this method * does not have to be called explicitly by the user of * the library: The library will automatically be * loaded when this class is loaded. */ public static void initialize() { if (!initialized) { LibUtils.loadLibrary("JCufft"); initialized = true; } } /** * Set the specified log level for the JCufft library.
*
* Currently supported log levels: *
* LOG_QUIET: Never print anything
* LOG_ERROR: Print error messages
* LOG_TRACE: Print a trace of all native function calls
* * @param logLevel The log level to use. */ public static void setLogLevel(LogLevel logLevel) { setLogLevel(logLevel.ordinal()); } private static native void setLogLevel(int logLevel); /** * Enables or disables exceptions. By default, the methods of this class * only return the cufftResult error code from the underlying CUDA function. * If exceptions are enabled, a CudaException with a detailed error * message will be thrown if a method is about to return a result code * that is not cufftResult.CUFFT_SUCCESS * * @param enabled Whether exceptions are enabled */ public static void setExceptionsEnabled(boolean enabled) { exceptionsEnabled = enabled; } /** * If the given result is different to cufftResult.CUFFT_SUCCESS and * exceptions have been enabled, this method will throw a * CudaException with an error message that corresponds to the * given result code. Otherwise, the given result is simply * returned. * * @param result The result to check * @return The result that was given as the parameter * @throws CudaException If exceptions have been enabled and * the given result code is not cufftResult.CUFFT_SUCCESS */ private static int checkResult(int result) { if (exceptionsEnabled && result != cufftResult.CUFFT_SUCCESS) { throw new CudaException(cufftResult.stringFor(result)); } return result; } /** * Writes the CUFFT version into the given argument. * * @param version The version * @return The cufftResult code. */ public static int cufftGetVersion(int version[]) { return cufftGetVersionNative(version); } private static native int cufftGetVersionNative(int version[]); /** *
     * Creates a 1D FFT plan configuration for a specified signal size and data
     * type.
     *
     * cufftResult cufftPlan1d( cufftHandle *plan, int nx, cufftType type, int batch );
     *
     * The batch input parameter tells CUFFT how many 1D transforms to configure.
     *
     * Input
     * ----
     * plan  Pointer to a cufftHandle object
     * nx    The transform size (e.g., 256 for a 256-point FFT)
     * type  The transform data type (e.g., CUFFT_C2C for complex to complex)
     * batch Number of transforms of size nx
     *
     * Output
     * ----
     * plan     Contains a CUFFT 1D plan handle value
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_SIZE The nx parameter is not a supported size.
     * CUFFT_INVALID_TYPE The type parameter is not supported.
     * CUFFT_ALLOC_FAILED Allocation of GPU resources for the plan failed.
     * CUFFT_SUCCESS      CUFFT successfully created the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     * NOTE: Batch sizes other than 1 for cufftPlan1d() have been 
     * deprecated as of CUDA 6.0RC. Use cufftPlanMany() for 
     * multiple batch execution.
     */
    public static int cufftPlan1d(cufftHandle plan, int nx, int type, int batch)
    {
        plan.setDimension(1);
        plan.setType(type);
        plan.setSize(nx, 0, 0);
        plan.setBatchSize(batch);
        return checkResult(cufftPlan1dNative(plan, nx, type, batch));
    }
    private static native int cufftPlan1dNative(cufftHandle plan, int nx, int type, int batch);


    /**
     * 
     * Creates a 2D FFT plan configuration according to specified signal sizes
     * and data type.
     *
     * cufftResult cufftPlan2d( cufftHandle *plan, int nx, int ny, cufftType type );
     *
     * This function is the same as cufftPlan1d() except that
     * it takes a second size parameter, ny, and does not support batching.
     *
     * Input
     * ----
     * plan Pointer to a cufftHandle object
     * nx   The transform size in the X dimension (number of rows)
     * ny   The transform size in the Y dimension (number of columns)
     * type The transform data type (e.g., CUFFT_C2R for complex to real)
     *
     * Output
     * ----
     * plan Contains a CUFFT 2D plan handle value
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_SIZE The nx or ny parameter is not a supported size.
     * CUFFT_INVALID_TYPE The type parameter is not supported.
     * CUFFT_ALLOC_FAILED Allocation of GPU resources for the plan failed.
     * CUFFT_SUCCESS      CUFFT successfully created the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftPlan2d(cufftHandle plan, int nx, int ny, int type)
    {
        plan.setDimension(2);
        plan.setType(type);
        plan.setSize(nx, ny, 0);
        return checkResult(cufftPlan2dNative(plan, nx, ny, type));
    }
    private static native int cufftPlan2dNative(cufftHandle plan, int nx, int ny, int type);

    /**
     * 
     * Creates a 3D FFT plan configuration according to specified signal sizes
     * and data type.
     *
     * cufftResult cufftPlan3d( cufftHandle *plan, int nx, int ny, int nz, cufftType type );
     *
     * This function is the same as cufftPlan2d() except that
     * it takes a third size parameter nz.
     *
     * Input
     * ----
     * plan Pointer to a cufftHandle object
     * nx The transform size in the X dimension
     * ny The transform size in the Y dimension
     * nz The transform size in the Z dimension
     * type The transform data type (e.g., CUFFT_R2C for real to complex)
     *
     * Output
     * ----
     * plan Contains a CUFFT 3D plan handle value
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_SIZE Parameter nx, ny, or nz is not a supported size.
     * CUFFT_INVALID_TYPE The type parameter is not supported.
     * CUFFT_ALLOC_FAILED Allocation of GPU resources for the plan failed.
     * CUFFT_SUCCESS      CUFFT successfully created the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftPlan3d(cufftHandle plan, int nx, int ny, int nz, int type)
    {
        plan.setDimension(3);
        plan.setType(type);
        plan.setSize(nx, ny, nz);
        return checkResult(cufftPlan3dNative(plan, nx, ny, nz, type));
    }

    private static native int cufftPlan3dNative(cufftHandle plan, int nx, int ny, int nz, int type);

    
    /**
     * 
     * Creates a FFT plan configuration of dimension rank, with sizes
     * specified in the array n.
     * 
     * cufftResult cufftPlanMany(cufftHandle *plan, int rank, int *n,
     *     int *inembed, int istride, int idist,
     *     int *onembed, int ostride, int odist,
     *     cufftType type, int batch );
     * 
     * The batch input parameter tells CUFFT how many transforms to 
     * configure in parallel. With this function, batched plans of 
     * any dimension may be created. Input parameters inembed, istride, 
     * and idist and output parameters onembed, ostride, and odist 
     * will allow setup of noncontiguous input data in a future version. 
     * Note that for CUFFT 3.0, these parameters are ignored and the 
     * layout of batched data must be side-by-side and not interleaved.
     *  
     * Input
     * ----
     * plan Pointer to a cufftHandle object
     * rank Dimensionality of the transform (1, 2, or 3)
     * n An array of size rank, describing the size of each dimension
     * inembed Unused: pass NULL
     * istride Unused: pass 1
     * idist Unused: pass 0
     * onembed Unused: pass NULL
     * ostride Unused: pass 1
     * odist Unused: pass 0
     * type Transform data type (e.g., CUFFT_C2C, as per other CUFFT calls)
     * batch Batch size for this transform
     *
     * Output
     * ----
     * plan Contains a CUFFT plan handle
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_SIZE Parameter is not a supported size.
     * CUFFT_INVALID_TYPE The type parameter is not supported     
     * 
*/ public static int cufftPlanMany(cufftHandle plan, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch) { return checkResult(cufftPlanManyNative(plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch)); } private static native int cufftPlanManyNative(cufftHandle plan, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch); public static int cufftMakePlan1d( cufftHandle plan, int nx, int type, int batch, /* deprecated - use cufftPlanMany */ long workSize[]) { return checkResult(cufftMakePlan1dNative(plan, nx, type, batch, workSize)); } private static native int cufftMakePlan1dNative( cufftHandle plan, int nx, int type, int batch, /* deprecated - use cufftPlanMany */ long workSize[]); public static int cufftMakePlan2d( cufftHandle plan, int nx, int ny, int type, long workSize[]) { return checkResult(cufftMakePlan2dNative(plan, nx, ny, type, workSize)); } private static native int cufftMakePlan2dNative( cufftHandle plan, int nx, int ny, int type, long workSize[]); public static int cufftMakePlan3d( cufftHandle plan, int nx, int ny, int nz, int type, long workSize[]) { return checkResult(cufftMakePlan3dNative(plan, nx, ny, nz, type, workSize)); } private static native int cufftMakePlan3dNative( cufftHandle plan, int nx, int ny, int nz, int type, long workSize[]); public static int cufftMakePlanMany( cufftHandle plan, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workSize[]) { return checkResult(cufftMakePlanManyNative( plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, workSize)); } private static native int cufftMakePlanManyNative( cufftHandle plan, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workSize[]); public static int cufftEstimate1d(int nx, int type, int batch, /* deprecated - use cufftPlanMany */ long workSize[]) { return checkResult(cufftEstimate1dNative(nx, type, batch, workSize)); } private static native int cufftEstimate1dNative( int nx, int type, int batch, /* deprecated - use cufftPlanMany */ long workSize[]); public static int cufftEstimate2d( int nx, int ny, int type, long workSize[]) { return checkResult(cufftEstimate2dNative(nx, ny, type, workSize)); } private static native int cufftEstimate2dNative( int nx, int ny, int type, long workSize[]); public static int cufftEstimate3d( int nx, int ny, int nz, int type, long workSize[]) { return checkResult(cufftEstimate3dNative(nx, ny, nz, type, workSize)); } private static native int cufftEstimate3dNative( int nx, int ny, int nz, int type, long workSize[]); public static int cufftEstimateMany( int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workSize[]) { return checkResult(cufftEstimateManyNative(rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, workSize)); } private static native int cufftEstimateManyNative( int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workSize[]); public static int cufftCreate(cufftHandle cufftHandle) { return checkResult(cufftCreateNative(cufftHandle)); } private static native int cufftCreateNative(cufftHandle cufftHandle); public static int cufftGetSize1d(cufftHandle handle, int nx, int type, int batch, long workSize[]) { return checkResult(cufftGetSize1dNative(handle, nx, type, batch, workSize)); } private static native int cufftGetSize1dNative(cufftHandle handle, int nx, int type, int batch, long workSize[]); public static int cufftGetSize2d(cufftHandle handle, int nx, int ny, int type, long workSize[]) { return checkResult(cufftGetSize2dNative(handle, nx, ny, type, workSize)); } private static native int cufftGetSize2dNative(cufftHandle handle, int nx, int ny, int type, long workSize[]); public static int cufftGetSize3d(cufftHandle handle, int nx, int ny, int nz, int type, long workSize[]) { return checkResult(cufftGetSize3dNative(handle, nx, ny, nz, type, workSize)); } private static native int cufftGetSize3dNative(cufftHandle handle, int nx, int ny, int nz, int type, long workSize[]); public static int cufftGetSizeMany(cufftHandle handle, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workArea[]) { return checkResult(cufftGetSizeManyNative(handle, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, workArea)); } private static native int cufftGetSizeManyNative(cufftHandle handle, int rank, int n[], int inembed[], int istride, int idist, int onembed[], int ostride, int odist, int type, int batch, long workArea[]); public static int cufftGetSize(cufftHandle handle, long workSize[]) { return checkResult(cufftGetSizeNative(handle, workSize)); } private static native int cufftGetSizeNative(cufftHandle handle, long workSize[]); public static int cufftSetWorkArea(cufftHandle plan, Pointer workArea) { return checkResult(cufftSetWorkAreaNative(plan, workArea)); } private static native int cufftSetWorkAreaNative(cufftHandle plan, Pointer workArea); public static int cufftSetAutoAllocation(cufftHandle plan, int autoAllocate) { return checkResult(cufftSetAutoAllocationNative(plan, autoAllocate)); } private static native int cufftSetAutoAllocationNative(cufftHandle plan, int autoAllocate); /** *
     * Frees all GPU resources associated with a CUFFT plan and destroys the
     * internal plan data structure.
     *
     * cufftResult cufftDestroy( cufftHandle plan );
     *
     * This function should be called once a plan
     * is no longer needed to avoid wasting GPU memory.
     *
     * Input
     * ----
     * plan The cufftHandle object of the plan to be destroyed.
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED    CUFFT library failed to initialize.
     * CUFFT_SHUTDOWN_FAILED CUFFT library failed to shut down.
     * CUFFT_INVALID_PLAN    The plan parameter is not a valid handle.
     * CUFFT_SUCCESS         CUFFT successfully destroyed the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftDestroy(cufftHandle plan)
    {
        return checkResult(cufftDestroyNative(plan));
    }

    private static native int cufftDestroyNative(cufftHandle plan);

    /**
     * 
     * Associates a CUDA stream with a CUFFT plan.
     * 
     * cufftResult cufftSetStream( cufftHandle plan, cudaStream_t stream );
     *
     * All kernel launches made during plan execution are now done through 
     * the associated stream, enabling overlap with activity in other 
     * streams (for example, data copying). The association remains until 
     * the plan is destroyed or the stream is changed with another call 
     * to cufftSetStream(). 
     *
     * Input
     * plan The cufftHandle object to associate with the stream
     * stream A valid CUDA stream created with cudaStreamCreate() (or 0
     * for the default stream)
     * 
     * Return Values
     * CUFFT_INVALID_PLAN The plan parameter is not a valid handle.
     * CUFFT_SUCCESS The stream was successfully associated with the plan.
     * 
*/ public static int cufftSetStream(cufftHandle plan, cudaStream_t stream) { return checkResult(cufftSetStreamNative(plan, stream)); } private static native int cufftSetStreamNative(cufftHandle plan, cudaStream_t stream); /** *
     * Configures the layout of CUFFT output in FFTW-compatible modes.
     * 
     * When FFTW compatibility is desired, it can be configured for padding
     * only, for asymmetric complex inputs only, or to be fully compatible. 
     *
     * Input
     * plan The cufftHandle object to associate with the stream
     * mode The cufftCompatibility option to be used:
     *     CUFFT_COMPATIBILITY_NATIVE:
     *     Disable any FFTW compatibility mode.
     *     CUFFT_COMPATIBILITY_FFTW_PADDING:
     *     Support FFTW data padding. (Default)
     *     CUFFT_COMPATIBILITY_FFTW_ASYMMETRIC:
     *     Waive the C2R symmetry requirement.
     *     Should be used with asymmetric input.
     *     CUFFT_COMPATIBILITY_FFTW_ALL:
     *     Enable full FFTW compatibility.
     *     
     * Return Values
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN The plan parameter is not a valid handle.
     * CUFFT_SUCCESS CUFFT successfully executed the FFT plan.
     * 
*/ public static int cufftSetCompatibilityMode(cufftHandle plan, int mode) { return checkResult(cufftSetCompatibilityModeNative(plan, mode)); } private static native int cufftSetCompatibilityModeNative(cufftHandle plan, int mode); //=== Single precision =================================================== /** *
     * Executes a CUFFT complex-to-complex transform plan.
     *
     * cufftResult cufftExecC2C( cufftHandle plan, cufftComplex *idata, cufftComplex *odata, int direction );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata parameter. This
     * function stores the Fourier coefficients in the odata array. If idata and
     * odata are the same, this method does an in-place transform.
     *
     * Input
     * ----
     * plan      The cufftHandle object for the plan to update
     * idata     Pointer to the input data (in GPU memory) to transform
     * odata     Pointer to the output data (in GPU memory)
     * direction The transform direction: CUFFT_FORWARD or CUFFT_INVERSE
     *
     * Output
     * ----
     * odata Contains the complex Fourier coefficients
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED  CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN  The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata, odata, and/or direction parameter is not valid.
     * CUFFT_EXEC_FAILED   CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS       CUFFT successfully executed the FFT plan
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecC2C(cufftHandle plan, Pointer cIdata, Pointer cOdata, int direction)
    {
        return checkResult(cufftExecC2CNative(plan, cIdata, cOdata, direction));
    }
    private static native int cufftExecC2CNative(cufftHandle plan, Pointer cIdata, Pointer cOdata, int direction);


    /**
     * Convenience method for {@link JCufft#cufftExecC2C(cufftHandle, Pointer, Pointer, int)}.
     * Accepts arrays for input and output data and automatically performs the host-device 
     * and device-host copies.
     *
     * @see JCufft#cufftExecC2C(cufftHandle, Pointer, Pointer, int)
     */
    public static int cufftExecC2C(cufftHandle plan, float cIdata[], float cOdata[], int direction)
    {
        int cudaResult = 0;

        boolean inPlace = (cIdata == cOdata);

        // Allocate space for the input data on the device
        Pointer hostCIdata = Pointer.to(cIdata);
        Pointer deviceCIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceCIdata, cIdata.length * Sizeof.FLOAT);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Set the output device data to be equal to the input
        // device data for in-place transforms, or allocate
        // the output device data if the transform is not
        // in-place
        Pointer hostCOdata = null;
        Pointer deviceCOdata = null;
        if (inPlace)
        {
            hostCOdata = hostCIdata;
            deviceCOdata = deviceCIdata;
        }
        else
        {
            hostCOdata = Pointer.to(cOdata);
            deviceCOdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceCOdata, cOdata.length * Sizeof.FLOAT);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCIdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceCIdata, hostCIdata, cIdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }

            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecC2C(plan, deviceCIdata, deviceCOdata, direction);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(result));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostCOdata, deviceCOdata, cOdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceCIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceCOdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }



    /**
     * 
     * Executes a CUFFT real-to-complex (implicitly forward) transform plan.
     *
     * cufftResult cufftExecR2C( cufftHandle plan, cufftReal *idata, cufftComplex *odata );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata
     * parameter. This function stores the non-redundant Fourier coefficients
     * in the odata array. If idata and odata are the same, this method does
     * an in-place transform (See CUFFT documentation for details on real
     * data FFTs.)
     *
     * Input
     * ----
     * plan      The cufftHandle object for the plan to update
     * idata     Pointer to the input data (in GPU memory) to transform
     * odata     Pointer to the output data (in GPU memory)
     * direction The transform direction: CUFFT_FORWARD or CUFFT_INVERSE
     *
     * Output
     * ----
     * odata Contains the complex Fourier coefficients
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata, odata, and/or direction parameter is not valid.
     * CUFFT_EXEC_FAILED CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS CUFFT successfully executed the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecR2C(cufftHandle plan, Pointer rIdata, Pointer cOdata)
    {
        return checkResult(cufftExecR2CNative(plan, rIdata, cOdata));
    }
    private static native int cufftExecR2CNative(cufftHandle plan, Pointer rIdata, Pointer cOdata);



    /**
     * Convenience method for {@link JCufft#cufftExecR2C(cufftHandle, Pointer, Pointer)}.
     * Accepts arrays for input and output data and automatically performs the host-device
     * and device-host copies.
     *
     * @see JCufft#cufftExecR2C(cufftHandle, Pointer, Pointer)
     */
    public static int cufftExecR2C(cufftHandle plan, float rIdata[], float cOdata[])
    {
        int cudaResult = 0;

        boolean inPlace = (rIdata == cOdata);

        // Allocate space for the input data on the device
        Pointer hostRIdata = Pointer.to(rIdata);
        Pointer deviceRIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceRIdata, rIdata.length * Sizeof.FLOAT);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Allocate the output device data
        Pointer hostCOdata = null;
        Pointer deviceCOdata = null;
        if (inPlace)
        {
            hostCOdata = hostRIdata;
            deviceCOdata = deviceRIdata;
        }
        else
        {
            hostCOdata = Pointer.to(cOdata);
            deviceCOdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceCOdata, cOdata.length * Sizeof.FLOAT);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCOdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceRIdata, hostRIdata, rIdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecR2C(plan, deviceRIdata, deviceCOdata);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(cudaResult));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostCOdata, deviceCOdata, cOdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceRIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceCOdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }








    /**
     * 
     * Executes a CUFFT complex-to-real (implicitly inverse) transform plan.
     *
     * cufftResult cufftExecC2R( cufftHandle plan, cufftComplex *idata, cufftReal *odata );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata
     * parameter. The input array holds only the non-redundant complex
     * Fourier coefficients. This function stores the real output values in the
     * odata array. If idata and odata are the same, this method does an inplace
     * transform. (See CUFFT documentation for details on real data FFTs.)
     *
     * Input
     * ----
     * plan The cufftHandle object for the plan to update
     * idata Pointer to the complex input data (in GPU memory) to transform
     * odata Pointer to the real output data (in GPU memory)
     *
     * Output
     * ----
     * odata Contains the real-valued output data
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED  CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN  The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata and/or odata parameter is not valid.
     * CUFFT_EXEC_FAILED   CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS       CUFFT successfully executed the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecC2R(cufftHandle plan, Pointer cIdata, Pointer rOdata)
    {
        return checkResult(cufftExecC2RNative(plan, cIdata, rOdata));
    }
    private static native int cufftExecC2RNative(cufftHandle plan, Pointer cIdata, Pointer rOdata);



    /**
     * Convenience method for {@link JCufft#cufftExecC2R(cufftHandle, Pointer, Pointer)}.
     * Accepts arrays for input and output data and automatically performs the host-device
     * and device-host copies.
     *
     * @see JCufft#cufftExecC2R(cufftHandle, Pointer, Pointer)
     */
    public static int cufftExecC2R(cufftHandle plan, float cIdata[], float rOdata[])
    {
        int cudaResult = 0;

        boolean inPlace = (cIdata == rOdata);

        // Allocate space for the input data on the device
        Pointer hostCIdata = Pointer.to(cIdata);
        Pointer deviceCIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceCIdata, cIdata.length * Sizeof.FLOAT);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Allocate the output device data
        Pointer hostROdata = null;
        Pointer deviceROdata = null;
        if (inPlace)
        {
            hostROdata = hostCIdata;
            deviceROdata = deviceCIdata;
        }
        else
        {
            hostROdata = Pointer.to(rOdata);
            deviceROdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceROdata, rOdata.length * Sizeof.FLOAT);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCIdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceCIdata, hostCIdata, cIdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecC2R(plan, deviceCIdata, deviceROdata);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(cudaResult));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostROdata, deviceROdata, rOdata.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceCIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceROdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }





    //=== Double precision ===================================================

    /**
     * 
     * Executes a CUFFT complex-to-complex transform plan for double precision
     * values.
     *
     * cufftResult cufftExecZ2Z( cufftHandle plan, cufftDoubleComplex *idata, cufftDoubleComplex *odata, int direction );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata parameter. This
     * function stores the Fourier coefficients in the odata array. If idata and
     * odata are the same, this method does an in-place transform.
     *
     * Input
     * ----
     * plan      The cufftHandle object for the plan to update
     * idata     Pointer to the input data (in GPU memory) to transform
     * odata     Pointer to the output data (in GPU memory)
     * direction The transform direction: CUFFT_FORWARD or CUFFT_INVERSE
     *
     * Output
     * ----
     * odata Contains the complex Fourier coefficients
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED  CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN  The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata, odata, and/or direction parameter is not valid.
     * CUFFT_EXEC_FAILED   CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS       CUFFT successfully executed the FFT plan
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecZ2Z(cufftHandle plan, Pointer cIdata, Pointer cOdata, int direction)
    {
        return checkResult(cufftExecZ2ZNative(plan, cIdata, cOdata, direction));
    }
    private static native int cufftExecZ2ZNative(cufftHandle plan, Pointer cIdata, Pointer cOdata, int direction);


    /**
     * Convenience method for {@link JCufft#cufftExecZ2Z(cufftHandle, Pointer, Pointer, int)}.
     * Accepts arrays for input and output data and automatically performs the host-device
     * and device-host copies.
     *
     * @see JCufft#cufftExecZ2Z(cufftHandle, Pointer, Pointer, int)
     */
    public static int cufftExecZ2Z(cufftHandle plan, double cIdata[], double cOdata[], int direction)
    {
        int cudaResult = 0;

        boolean inPlace = (cIdata == cOdata);

        // Allocate space for the input data on the device
        Pointer hostCIdata = Pointer.to(cIdata);
        Pointer deviceCIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceCIdata, cIdata.length * Sizeof.DOUBLE);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Set the output device data to be equal to the input
        // device data for in-place transforms, or allocate
        // the output device data if the transform is not
        // in-place
        Pointer hostCOdata = null;
        Pointer deviceCOdata = null;
        if (inPlace)
        {
            hostCOdata = hostCIdata;
            deviceCOdata = deviceCIdata;
        }
        else
        {
            hostCOdata = Pointer.to(cOdata);
            deviceCOdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceCOdata, cOdata.length * Sizeof.DOUBLE);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCIdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceCIdata, hostCIdata, cIdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }

            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecZ2Z(plan, deviceCIdata, deviceCOdata, direction);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(cudaResult));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostCOdata, deviceCOdata, cOdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceCIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceCOdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }



    /**
     * 
     * Executes a CUFFT real-to-complex (implicitly forward) transform plan
     * for double precision values.
     *
     * cufftResult cufftExecD2Z( cufftHandle plan, cufftDoubleReal *idata, cufftDoubleComplex *odata );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata
     * parameter. This function stores the non-redundant Fourier coefficients
     * in the odata array. If idata and odata are the same, this method does
     * an in-place transform (See CUFFT documentation for details on real
     * data FFTs.)
     *
     * Input
     * ----
     * plan      The cufftHandle object for the plan to update
     * idata     Pointer to the input data (in GPU memory) to transform
     * odata     Pointer to the output data (in GPU memory)
     * direction The transform direction: CUFFT_FORWARD or CUFFT_INVERSE
     *
     * Output
     * ----
     * odata Contains the complex Fourier coefficients
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata, odata, and/or direction parameter is not valid.
     * CUFFT_EXEC_FAILED CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS CUFFT successfully executed the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecD2Z(cufftHandle plan, Pointer rIdata, Pointer cOdata)
    {
        return checkResult(cufftExecD2ZNative(plan, rIdata, cOdata));
    }
    private static native int cufftExecD2ZNative(cufftHandle plan, Pointer rIdata, Pointer cOdata);



    /**
     * Convenience method for {@link JCufft#cufftExecD2Z(cufftHandle, Pointer, Pointer)}.
     * Accepts arrays for input and output data and automatically performs the host-device
     * and device-host copies.
     *
     * @see JCufft#cufftExecD2Z(cufftHandle, Pointer, Pointer)
     */
    public static int cufftExecD2Z(cufftHandle plan, double rIdata[], double cOdata[])
    {
        return cufftExecR2C(plan, rIdata, cOdata);
    }

    /**
     * @see JCufft#cufftExecD2Z(cufftHandle, Pointer, Pointer)
     * @see JCufft#cufftExecD2Z(cufftHandle, double[], double[])
     * @deprecated This method will be removed in a future release.
     * It should have been called 'cufftExecD2Z'.
     */
    public static int cufftExecR2C(cufftHandle plan, double rIdata[], double cOdata[])
    {
        int cudaResult = 0;

        boolean inPlace = (rIdata == cOdata);

        // Allocate space for the input data on the device
        Pointer hostRIdata = Pointer.to(rIdata);
        Pointer deviceRIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceRIdata, rIdata.length * Sizeof.DOUBLE);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Allocate the output device data
        Pointer hostCOdata = null;
        Pointer deviceCOdata = null;
        if (inPlace)
        {
            hostCOdata = hostRIdata;
            deviceCOdata = deviceRIdata;
        }
        else
        {
            hostCOdata = Pointer.to(cOdata);
            deviceCOdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceCOdata, cOdata.length * Sizeof.DOUBLE);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCOdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceRIdata, hostRIdata, rIdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecD2Z(plan, deviceRIdata, deviceCOdata);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(cudaResult));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostCOdata, deviceCOdata, cOdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceRIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceCOdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceRIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceCOdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }








    /**
     * 
     * Executes a CUFFT complex-to-real (implicitly inverse) transform plan
     * for double precision values.
     *
     * cufftResult cufftExecZ2D( cufftHandle plan, cufftDoubleComplex *idata, cufftDoubleReal *odata );
     *
     * CUFFT uses as input data the GPU memory pointed to by the idata
     * parameter. The input array holds only the non-redundant complex
     * Fourier coefficients. This function stores the real output values in the
     * odata array. If idata and odata are the same, this method does an inplace
     * transform. (See CUFFT documentation for details on real data FFTs.)
     *
     * Input
     * ----
     * plan The cufftHandle object for the plan to update
     * idata Pointer to the complex input data (in GPU memory) to transform
     * odata Pointer to the real output data (in GPU memory)
     *
     * Output
     * ----
     * odata Contains the real-valued output data
     *
     * Return Values
     * ----
     * CUFFT_SETUP_FAILED  CUFFT library failed to initialize.
     * CUFFT_INVALID_PLAN  The plan parameter is not a valid handle.
     * CUFFT_INVALID_VALUE The idata and/or odata parameter is not valid.
     * CUFFT_EXEC_FAILED   CUFFT failed to execute the transform on GPU.
     * CUFFT_SUCCESS       CUFFT successfully executed the FFT plan.
     *
     * JCUFFT_INTERNAL_ERROR If an internal JCufft error occurred
     * 
     */
    public static int cufftExecZ2D(cufftHandle plan, Pointer cIdata, Pointer rOdata)
    {
        return checkResult(cufftExecZ2DNative(plan, cIdata, rOdata));
    }
    private static native int cufftExecZ2DNative(cufftHandle plan, Pointer cIdata, Pointer rOdata);



    /**
     * Convenience method for {@link JCufft#cufftExecZ2D(cufftHandle, Pointer, Pointer)}.
     * Accepts arrays for input and output data and automatically performs the host-device
     * and device-host copies.
     *
     * @see JCufft#cufftExecZ2D(cufftHandle, Pointer, Pointer)
     */
    public static int cufftExecZ2D(cufftHandle plan, double cIdata[], double rOdata[])
    {
        int cudaResult = 0;

        boolean inPlace = (cIdata == rOdata);
        
        // Allocate space for the input data on the device
        Pointer hostCIdata = Pointer.to(cIdata);
        Pointer deviceCIdata = new Pointer();
        cudaResult = JCuda.cudaMalloc(deviceCIdata, cIdata.length * Sizeof.DOUBLE);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Allocate the output device data
        Pointer hostROdata = null;
        Pointer deviceROdata = null;
        if (inPlace)
        {
            hostROdata = hostCIdata;
            deviceROdata = deviceCIdata;
        }
        else
        {
            hostROdata = Pointer.to(rOdata);
            deviceROdata = new Pointer();
            cudaResult = JCuda.cudaMalloc(deviceROdata, rOdata.length * Sizeof.DOUBLE);
            if (cudaResult != cudaError.cudaSuccess)
            {
                JCuda.cudaFree(deviceCIdata);
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }

        // Copy the host input data to the device
        cudaResult = JCuda.cudaMemcpy(deviceCIdata, hostCIdata, cIdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyHostToDevice);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Execute the transform
        int result = cufftResult.CUFFT_SUCCESS;
        try
        {
            result = JCufft.cufftExecZ2D(plan, deviceCIdata, deviceROdata);
        }
        catch (CudaException e)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            result = cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (result != cufftResult.CUFFT_SUCCESS)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException(cufftResult.stringFor(cudaResult));
            }
            return result;
        }

        // Copy the device output data to the host
        cudaResult = JCuda.cudaMemcpy(hostROdata, deviceROdata, rOdata.length * Sizeof.DOUBLE, cudaMemcpyKind.cudaMemcpyDeviceToHost);
        if (cudaResult != cudaError.cudaSuccess)
        {
            JCuda.cudaFree(deviceCIdata);
            if (!inPlace)
            {
                JCuda.cudaFree(deviceROdata);
            }
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }

        // Free the device data
        cudaResult = JCuda.cudaFree(deviceCIdata);
        if (cudaResult != cudaError.cudaSuccess)
        {
            if (exceptionsEnabled)
            {
                throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
            }
            return cufftResult.JCUFFT_INTERNAL_ERROR;
        }
        if (!inPlace)
        {
            cudaResult = JCuda.cudaFree(deviceROdata);
            if (cudaResult != cudaError.cudaSuccess)
            {
                if (exceptionsEnabled)
                {
                    throw new CudaException("JCuda error: "+cudaError.stringFor(cudaResult));
                }
                return cufftResult.JCUFFT_INTERNAL_ERROR;
            }
        }
        return result;
    }

}








© 2015 - 2024 Weber Informatics LLC | Privacy Policy