All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gust.linalg.cuda.CuMatrixKernels.scala Maven / Gradle / Ivy

The newest version!
package gust.linalg.cuda

import breeze.generic.UFunc
import gust.util.cuda._
import jcuda.Pointer
import scala.reflect.ClassTag
import java.util.concurrent.ConcurrentHashMap
import breeze.linalg.{BroadcastedRows, BroadcastedColumns}

/**
 * TODO
 *
 * @author dlwh
 **/
trait CuMatrixKernels { this: CuMatrix.type =>
  class KernelBroker[T: ClassTag](typeName: String) {

    private val module: CuModule = {
      CuModule(getClass.getResourceAsStream(s"matrix_kernels_$typeName.ptx"))
    }

    private val implCache = new ConcurrentHashMap[String, CuKernel6[Int, Int, Pointer, Int, Pointer, Int]]
    private val impl2Cache = new ConcurrentHashMap[String, CuKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int]]
    private val impl2TransCache = new ConcurrentHashMap[String, CuKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int]]
    private val impl2VSCache = new ConcurrentHashMap[String, CuKernel7[Int, Int, Pointer, Int, Pointer, Int, T]]
    private val impl2SVCache = new ConcurrentHashMap[String, CuKernel7[Int, Int, Pointer, Int, T, Pointer, Int]]
    private val reduceCache = new ConcurrentHashMap[String, CuKernel5[Int, Int, Pointer, Pointer, Int]]
    private val colReduceCache = new ConcurrentHashMap[String, CuKernel5[Int, Int, Pointer, Pointer, Int]]
    private val rowReduceCache = new ConcurrentHashMap[String, CuKernel5[Int, Int, Pointer, Pointer, Int]]

    def implFor[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl[K, CuMatrix[T], CuMatrix[T]] = {
      var kern = implCache.get(funName)
      if (kern == null) {
        kern = module.getKernel6[Int, Int, Pointer, Int, Pointer, Int](s"map_${funName}_$typeName")
        implCache.put(funName, kern)
      }


      new UFunc.UImpl[K, CuMatrix[T], CuMatrix[T]] {
        def apply(v: CuMatrix[T]): CuMatrix[T] = {

          val res = if (v.isTranspose) CuMatrix.create[T](v.cols, v.rows).t else CuMatrix.create[T](v.rows, v.cols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1, 1))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v.offsetPointer, v.majorStride)
          res
        }
      }
    }

    def reducerFor[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl[K, CuMatrix[T], T] = {
      var kern = reduceCache.get(funName)
      if (kern == null) {
        kern = module.getKernel5[Int, Int, Pointer, Pointer, Int](s"reduce_${funName}_$typeName")
        reduceCache.put(funName, kern)
      }

      val byteSize = org.bridj.BridJ.sizeOf(implicitly[ClassTag[T]].runtimeClass)


      new UFunc.UImpl[K, CuMatrix[T], T] {
        def apply(v: CuMatrix[T]): T = {

          val tmpRows = 20
          val tmpCols = 512
          val tmp = CuMatrix.create[T](tmpRows, tmpCols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((tmpCols, tmpRows), (32, 1), 32 * 1 * byteSize.toInt)(minorSize, v.majorSize, tmp.offsetPointer, v.offsetPointer, v.majorStride)
          kern(1, (32, 1))(tmpCols * tmpRows, 1, tmp.offsetPointer, tmp.offsetPointer, 1)
          tmp(0 to 0, 0 to 0).toDense.apply(0, 0)
        }
      }
    }

    def colReducerFor[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl[K, BroadcastedColumns[CuMatrix[T], CuMatrix[T]], CuMatrix[T]] = {
      var kern = colReduceCache.get(funName)
      if (kern == null) {
        kern = module.getKernel5[Int, Int, Pointer, Pointer, Int](s"reduce_col_${funName}_$typeName")
        colReduceCache.put(funName, kern)
      }

      val byteSize = org.bridj.BridJ.sizeOf(implicitly[ClassTag[T]].runtimeClass)


      new UFunc.UImpl[K, BroadcastedColumns[CuMatrix[T], CuMatrix[T]], CuMatrix[T]] {
        def apply(vx: BroadcastedColumns[CuMatrix[T], CuMatrix[T]]) = {
          val v = vx.underlying

          val tmp = CuMatrix.create[T](1, v.cols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1), 32 * 1 * byteSize.toInt)(minorSize, v.majorSize, tmp.offsetPointer, v.offsetPointer, v.majorStride)
          tmp
        }
      }
    }

    def rowReducerFor[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl[K, BroadcastedRows[CuMatrix[T], CuMatrix[T]], CuMatrix[T]] = {
      var kern = rowReduceCache.get(funName)
      if (kern == null) {
        kern = module.getKernel5[Int, Int, Pointer, Pointer, Int](s"reduce_row_${funName}_$typeName")
        rowReduceCache.put(funName, kern)
      }

      val byteSize = org.bridj.BridJ.sizeOf(implicitly[ClassTag[T]].runtimeClass)


      new UFunc.UImpl[K, BroadcastedRows[CuMatrix[T], CuMatrix[T]], CuMatrix[T]] {
        def apply(vx: BroadcastedRows[CuMatrix[T], CuMatrix[T]]) = {
          val v = vx.underlying

          val tmp = CuMatrix.create[T](v.rows, 1)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 1), (32, 1), 32 * 1 * byteSize.toInt)(minorSize, v.majorSize, tmp.offsetPointer, v.offsetPointer, v.majorStride)
          tmp
        }
      }
    }

    def inPlaceImplFor[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.InPlaceImpl[K, CuMatrix[T]] = {
      var kern = implCache.get(funName)
      if (kern == null) {
        kern = module.getKernel6[Int, Int, Pointer, Int, Pointer, Int](s"map_${funName}_$typeName")
        implCache.put(funName, kern)
      }


      new UFunc.InPlaceImpl[K, CuMatrix[T]] {
        def apply(v: CuMatrix[T]) = {
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1))(minorSize, v.majorSize, v.offsetPointer, v.majorStride, v.offsetPointer, v.majorStride)
        }
      }
    }

    def impl2For[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl2[K, CuMatrix[T], CuMatrix[T], CuMatrix[T]] = {
      var kern = impl2Cache.get(funName)
      if (kern == null) {
        kern = module.getKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int](s"map2_${funName}_$typeName")
        impl2Cache.put(funName, kern)
      }

      var transKern = impl2TransCache.get(funName)
      if (transKern == null) {
        transKern = module.getKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int](s"map2_transpose_${funName}_$typeName")
        impl2TransCache.put(funName, transKern)
      }


      new UFunc.UImpl2[K, CuMatrix[T], CuMatrix[T], CuMatrix[T]] {
        def apply(v: CuMatrix[T], v2: CuMatrix[T]): CuMatrix[T] = {
          require(v.rows == v2.rows && v.cols == v2.cols, "Dimension mismatch!")
          val res = if (v.isTranspose) CuMatrix.create[T](v.cols, v.rows).t else CuMatrix.create[T](v.rows, v.cols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          if (v.isTranspose != v2.isTranspose) {
            transKern((512, 30), (32, 8))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v.offsetPointer, v.majorStride, v2.offsetPointer, v2.majorStride)
          } else {
            kern((512, 20), (32, 1))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v.offsetPointer, v.majorStride, v2.offsetPointer, v2.majorStride)
          }

          res
        }
      }
    }

    def inPlaceImpl2For[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.InPlaceImpl2[K, CuMatrix[T], CuMatrix[T]] = {
      var kern = impl2Cache.get(funName)
      if (kern == null) {
        kern = module.getKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int](s"map2_${funName}_$typeName")
        impl2Cache.put(funName, kern)
      }

      var transKern = impl2TransCache.get(funName)
      if (transKern == null) {
        transKern = module.getKernel8[Int, Int, Pointer, Int, Pointer, Int, Pointer, Int](s"map2_transpose_${funName}_$typeName")
        impl2TransCache.put(funName, transKern)
      }


      new UFunc.InPlaceImpl2[K, CuMatrix[T], CuMatrix[T]] {
        def apply(v: CuMatrix[T], v2: CuMatrix[T]) {
          require(v.rows == v2.rows && v.cols == v2.cols, "Dimension mismatch!")
          val minorSize = if (v.isTranspose) v.cols else v.rows
          if (v.isTranspose != v2.isTranspose) {
            transKern((512, 30), (32, 8))(minorSize, v.majorSize, v.offsetPointer, v.majorStride, v.offsetPointer, v.majorStride, v2.offsetPointer, v2.majorStride)
          } else {
            kern((512, 20), (32, 1, 1))(minorSize, v.majorSize, v.offsetPointer, v.majorStride, v.offsetPointer, v.majorStride, v2.offsetPointer, v2.majorStride)
          }
        }
      }
    }

    def impl2For_v_s[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl2[K, CuMatrix[T], T, CuMatrix[T]] = {
      var kern = impl2VSCache.get(funName)
      if (kern == null) {
        kern = module.getKernel7[Int, Int, Pointer, Int, Pointer, Int, T](s"map2_v_s_${funName}_$typeName")
        impl2VSCache.put(funName, kern)
      }


      new UFunc.UImpl2[K, CuMatrix[T], T, CuMatrix[T]] {
        def apply(v: CuMatrix[T], v2: T): CuMatrix[T] = {

          val res = if (v.isTranspose) CuMatrix.create[T](v.cols, v.rows).t else CuMatrix.create[T](v.rows, v.cols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1, 1))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v.offsetPointer, v.majorStride, v2)
          res
        }
      }
    }

    def inPlaceImpl2For_v_s[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.InPlaceImpl2[K, CuMatrix[T], T] = {
      var kern = impl2VSCache.get(funName)
      if (kern == null) {
        kern = module.getKernel7[Int, Int, Pointer, Int, Pointer, Int, T](s"map2_v_s_${funName}_$typeName")
        impl2VSCache.put(funName, kern)
      }


      new UFunc.InPlaceImpl2[K, CuMatrix[T], T] {
        def apply(v: CuMatrix[T], v2: T) = {

          val res = v
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1, 1))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v.offsetPointer, v.majorStride, v2)
        }
      }
    }

    def impl2For_s_v[K <: UFunc](funName: String)(implicit context: CuContext = CuContext.ensureContext): UFunc.UImpl2[K, T, CuMatrix[T], CuMatrix[T]] = {
      var kern = impl2SVCache.get(funName)
      if (kern == null) {
        kern = module.getKernel7[Int, Int, Pointer, Int, T, Pointer, Int](s"map2_s_v_${funName}_$typeName")
        impl2SVCache.put(funName, kern)
      }


      new UFunc.UImpl2[K, T, CuMatrix[T], CuMatrix[T]] {
        def apply(v2: T, v: CuMatrix[T]): CuMatrix[T] = {

          val res = if (v.isTranspose) CuMatrix.create[T](v.cols, v.rows).t else CuMatrix.create[T](v.rows, v.cols)
          val minorSize = if (v.isTranspose) v.cols else v.rows
          kern((512, 20), (32, 1, 1))(minorSize, v.majorSize, res.offsetPointer, res.majorStride, v2, v.offsetPointer, v.majorStride)
          res
        }
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy