com.nativelibs4java.opencl.util.LinearAlgebraKernels.c Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of javacl Show documentation

JavaCL is an Object-Oriented API that makes the C OpenCL API available to Java in a very natural way. It hides away the complexity of cross-platform C bindings, has a clean OO design (with generics, Java enums, NIO buffers, fully typed exceptions...), provides high-level features (OpenGL-interop, array reductions) and comes with samples and demos. For more info, please visit http://code.google.com/p/nativelibs4java/wiki/OpenCL.

There is a newer version: 1.0.0-RC4

Show newest version

#pragma OPENCL EXTENSION cl_khr_fp64: enable

__kernel void mulMatDouble(
   __global const double* a, /*size_t aRows,*/ int aColumns,
   __global const double* b, /*size_t bRows,*/ int bColumns,
   __global double* c
) {
    int i = get_global_id(0);
    int j = get_global_id(1);
    
    double total = 0;
    // c[i, j] = sum(a[i, k] * b[k, j])
    int iAOff = i * aColumns;
    for (int k = 0; k < aColumns; k++) {
        total += a[iAOff + k] * b[k * bColumns + j];
    }
    c[i * bColumns + j] = total;
}

__kernel void mulVecDouble(
   __global const double* a, /*size_t aRows,*/ int aColumns,
   __global const double* b, int bSize,
   __global double* c
) {
    size_t globalId = get_global_id(0);
    size_t i = globalId;

    double total = 0;
    size_t iOff = i * aColumns;
    for (size_t k = 0; k < aColumns; k++) {
        total += a[iOff + k] * b[k];
    }
    c[i] = total;
}

__kernel void transposeDouble(
    __global const double* a, int aRows, int aColumns,
    __global double* out
) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    int outColumns = aRows;
    out[i * outColumns + j] = a[j * aColumns + i];
}

__kernel void mulMatFloat(
   __global const float* a, /*size_t aRows,*/ int aColumns,
   __global const float* b, /*size_t bRows,*/ int bColumns,
   __global float* c
) {
    int i = get_global_id(0);
    int j = get_global_id(1);
    
    float total = 0;
    int iOff = i * aColumns;
    for (int k = 0; k < aColumns; k++) {
        total += a[iOff + k] * b[k * bColumns + j];
    }
    c[i * bColumns + j] = total;
    // c[0] = a[0];//total;
    // c[1] = a[1];//total;
    // c[2] = a[2];//total;
    // c[3] = a[3];//total;
}

__kernel void mulVecFloat(
   __global const float* a, /*size_t aRows,*/ int aColumns,
   __global const float* b, int bSize,
   __global float* c
) {
    int globalId = get_global_id(0);
    int i = globalId;

    float total = 0;
    int iOff = i * aColumns;
    for (size_t k = 0; k < aColumns; k++) {
        total += a[iOff + k] * b[k];
    }
    c[i] = total;
}

__kernel void transposeFloat(
    __global const float* a, int aRows, int aColumns,
    __global float* out
) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    int outColumns = aRows;
    out[i * outColumns + j] = a[j * aColumns + i];
}