All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aparapi.examples.matrix.CorrMatrixKernel Maven / Gradle / Ivy

/**
 * Copyright (c) 2016 - 2017 Syncleus, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * This material was prepared as an account of work sponsored by an agency of the United States Government.  
 * Neither the United States Government nor the United States Department of Energy, nor Battelle, nor any of 
 * their employees, nor any jurisdiction or organization that has cooperated in the development of these materials, 
 * makes any warranty, express or implied, or assumes any legal liability or responsibility for the accuracy, 
 * completeness, or usefulness or any information, apparatus, product, software, or process disclosed, or represents
 * that its use would not infringe privately owned rights.
 */
package com.aparapi.examples.matrix;

import com.aparapi.Kernel;

/**
 * This kernel attempts to re-implement the Lucene OpenBitSet functionality on a GPU
 * 
 * Based on code from: 
* {@link http://grepcode.com/file/repo1.maven.org/maven2/org.apache.lucene/lucene-core/3.1.0/org/apache/lucene/util/BitUtil.java} * * @author ryan.lamothe at gmail.com * @author sedillard at gmail.com */ public class CorrMatrixKernel extends Kernel { final long[] matrixA; final int matrixA_NumTerms; final long[] matrixB; final int matrixB_NumTerms; int numLongs; int[] resultMatrix; /** * Default constructor */ public CorrMatrixKernel(final long[] matrixA, final int matrixA_NumTerms, final long[] matrixB, final int matrixB_NumTerms, final int numLongs, final int[] resultMatrix) { this.matrixA = matrixA; this.matrixA_NumTerms = matrixA_NumTerms; this.matrixB = matrixB; this.matrixB_NumTerms = matrixB_NumTerms; this.numLongs = numLongs; this.resultMatrix = resultMatrix; } @Override public void run() { final int i = this.getGlobalId(0); if (i < matrixA_NumTerms) { final int j = this.getGlobalId(1); if (j < matrixB_NumTerms) { // For testing purposes, you can use the naive implementation to compare performance resultMatrix[(i * matrixB_NumTerms) + j] = pop_intersect(matrixA, i * numLongs, matrixB, j * numLongs, numLongs); // this.resultMatrix[i * matrixB_NumTerms + j] = this.naive_pop_intersect(matrixA, i * numLongs, matrixB, j * numLongs, numLongs); } } } /** * A naive implementation of the pop_array code below */ private int naive_pop_intersect(final long matrixA[], final int aStart, final long matrixB[], final int bStart, final int numWords) { int sum = 0; for (int i = 0; i < numWords; i++) { sum += pop(matrixA[aStart + i] & matrixB[bStart + i]); } return sum; } /** * Returns the popcount or cardinality of the two sets after an intersection. * Neither array is modified. * * Modified for the purposes of this kernel from its original version */ private int pop_intersect(final long matrixA[], final int aStart, final long matrixB[], final int bStart, final int numWords) { /* * http://grepcode.com/file/repo1.maven.org/maven2/org.apache.lucene/lucene-core/3.1.0/org/apache/lucene/util/BitUtil.java */ // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' final int n = numWords; int tot = 0, tot8 = 0; long ones = 0, twos = 0, fours = 0; int i; for (i = 0; i <= (n - 8); i += 8) { long twosA = 0; long twosB = 0; long foursA = 0; long foursB = 0; long eights = 0; final int ai = aStart + i; final int bi = bStart + i; // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1])) { final long b = matrixA[ai] & matrixB[bi], c = matrixA[ai + 1] & matrixB[bi + 1]; final long u = ones ^ b; twosA = (ones & b) | (u & c); ones = u ^ c; } // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3])) { final long b = matrixA[ai + 2] & matrixB[bi + 2], c = matrixA[ai + 3] & matrixB[bi + 3]; final long u = ones ^ b; twosB = (ones & b) | (u & c); ones = u ^ c; } // CSA(foursA, twos, twos, twosA, twosB) { final long u = twos ^ twosA; foursA = (twos & twosA) | (u & twosB); twos = u ^ twosB; } // CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5])) { final long b = matrixA[ai + 4] & matrixB[bi + 4], c = matrixA[ai + 5] & matrixB[bi + 5]; final long u = ones ^ b; twosA = (ones & b) | (u & c); ones = u ^ c; } // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7])) { final long b = matrixA[ai + 6] & matrixB[bi + 6], c = matrixA[ai + 7] & matrixB[bi + 7]; final long u = ones ^ b; twosB = (ones & b) | (u & c); ones = u ^ c; } // CSA(foursB, twos, twos, twosA, twosB) { final long u = twos ^ twosA; foursB = (twos & twosA) | (u & twosB); twos = u ^ twosB; } // CSA(eights, fours, fours, foursA, foursB) { final long u = fours ^ foursA; eights = (fours & foursA) | (u & foursB); fours = u ^ foursB; } tot8 += pop(eights); } if (i <= (n - 4)) { final int ai = aStart + i; final int bi = bStart + i; long twosA = 0; long twosB = 0; long foursA = 0; long eights = 0; { final long b = matrixA[ai] & matrixB[bi], c = matrixA[ai + 1] & matrixB[bi + 1]; final long u = ones ^ b; twosA = (ones & b) | (u & c); ones = u ^ c; } { final long b = matrixA[ai + 2] & matrixB[bi + 2], c = matrixA[ai + 3] & matrixB[bi + 3]; final long u = ones ^ b; twosB = (ones & b) | (u & c); ones = u ^ c; } { final long u = twos ^ twosA; foursA = (twos & twosA) | (u & twosB); twos = u ^ twosB; } eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= (n - 2)) { final int ai = aStart + i; final int bi = bStart + i; final long b = matrixA[ai] & matrixB[bi], c = matrixA[ai + 1] & matrixB[bi + 1]; final long u = ones ^ b; final long twosA = (ones & b) | (u & c); ones = u ^ c; final long foursA = twos & twosA; twos = twos ^ twosA; final long eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { final int ai = aStart + i; final int bi = bStart + i; tot += pop(matrixA[ai] & matrixB[bi]); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } /** * Returns the number of bits set in the long */ private int pop(long x) { /* * http://grepcode.com/file/repo1.maven.org/maven2/org.apache.lucene/lucene-core/3.1.0/org/apache/lucene/util/BitUtil.java */ /* * Hacker's Delight 32 bit pop function: * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.c.txt * * int pop(unsigned x) { * x = x - ((x >> 1) & 0x55555555); * x = (x & 0x33333333) + ((x >> 2) & 0x33333333); * x = (x + (x >> 4)) & 0x0F0F0F0F; * x = x + (x >> 8); * x = x + (x >> 16); * return x & 0x0000003F; * } * * */ // 64 bit java version of the C function from above x = x - ((x >>> 1) & 0x5555555555555555L); x = (x & 0x3333333333333333L) + ((x >>> 2) & 0x3333333333333333L); x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL; x = x + (x >>> 8); x = x + (x >>> 16); x = x + (x >>> 32); return (int) x & 0x7F; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy