All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kernels.wip.matmul_template.cl Maven / Gradle / Ivy

The newest version!

    // Taken from: https://github.com/premsasidharan/GpgpuOpenCL/blob/5ab5e2b1396df9804600664eb3bbed7c97bc22ad/Examples/OpenCL_1_2/MatrixMult/main.cpp#L9-L33

    kernel void multiply(
        global float* pC,
        global const float* pA,
        global const float* pB,
        int M, int N, int P
    ){
        local float shA[16][16];
        local float shB[16][16];

        int m = get_global_id(0);
        int p = get_global_id(1);

        int pc = ( get_group_id(1) << 4 ) + get_local_id(0);

        float result = 0.0;

        for ( int n = get_local_id(1); n < N; n += 16 )
        {
            shA[ get_local_id(0) ][ get_local_id(1) ] = pA[ ( N * m ) + n ];
            shB[ get_local_id(0) ][ get_local_id(1) ] = pB[ ( P * n ) + pc ];

            barrier(CLK_LOCAL_MEM_FENCE);

            for ( int i = 0; i < 16; i++ )
            {
                result += ( shA[ get_local_id(0) ][i] * shB[ get_local_id(1) ][i] );
            }

            barrier(CLK_LOCAL_MEM_FENCE);
        }

        pC[ ( P * m ) + p ] = result;
    }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy