All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gust.linalg.cuda.CuVector.scala Maven / Gradle / Ivy

The newest version!
package gust.linalg.cuda

import breeze.linalg._
import org.bridj.Pointer
import jcuda.jcublas.{cublasOperation, cublasHandle, JCublas2}
import gust.util.cuda._
import breeze.generic.UFunc
import spire.algebra.VectorSpace
import breeze.math.{Semiring, MutableInnerProductVectorSpace}
import breeze.linalg.support.{CanSlice, CanSlice2, CanCopy, CanCreateZerosLike}
import breeze.linalg.operators._
import scala.reflect.ClassTag
import jcuda.runtime.{cudaStream_t, cudaMemcpyKind, JCuda}
import jcuda.jcurand.{curandRngType, curandGenerator}
import gust.util.cuda
import breeze.numerics._
import breeze.generic.UFunc.{UImpl, UImpl2, InPlaceImpl2}
import jcuda.driver.CUstream
import breeze.stats.distributions.{RandBasis, Rand}

/**
 * A gpu side dense vector
 *
 * @author dlwh
 **/
class CuVector[V](val data: Pointer[V],
                  val offset: Int,
                  val stride: Int,
                  val length: Int) extends NumericOps[CuVector[V]] {
  override def repr: CuVector[V] = this
  def this(length: Int)(implicit ct: ClassTag[V]) = this(cuda.allocate[V](length), 0, 1, length)

  def elemSize = data.getIO.getTargetSize
  def offsetPointer = data.toCuPointer.withByteOffset(elemSize * offset)

  def size = length

  def toDense = {
    val arrayData = Pointer.allocateArray(data.getIO, size)


    JCublas2.cublasGetVector(length, elemSize.toInt, data.toCuPointer.withByteOffset(elemSize * offset), stride, arrayData.toCuPointer, 1)

    new DenseVector(arrayData.toArray)
  }

  def toMatrix = {
    new CuMatrix[V](length, 1, data, offset, stride, false)
  }



  def writeFrom(b: CuVector[V])(implicit stream: CUstream = new CUstream()) = {
    require(b.length == this.length, "Matrices must have same number of length")

    val aPtr = offsetPointer
    val bPtr = b.offsetPointer

    val (width, height) = (1, b.length)

    if(b.stride == 1 && this.stride == 1) {
      JCuda.cudaMemcpyAsync(aPtr, bPtr, size * elemSize, cudaMemcpyKind.cudaMemcpyDeviceToDevice, new cudaStream_t(stream))
    } else {
      JCuda.cudaMemcpy2DAsync(aPtr,
        stride * elemSize,
        bPtr,
        b.stride * elemSize,
        width * elemSize,
        height,
        cudaMemcpyKind.cudaMemcpyDeviceToDevice,
        new cudaStream_t(stream)
      )

    }

  }

  def writeFromDense(b: DenseVector[V]): Int = {
    require(b.length == this.length, "Matrices must have same number of length")


    val bPtr = cuda.cuPointerToArray(b.data)


    JCuda.cudaMemcpy2D(data.toCuPointer.withByteOffset(offset * elemSize),
      stride * elemSize,
      bPtr.withByteOffset(offset * elemSize),
      b.stride * elemSize,
      1 * elemSize,
      length,
      cudaMemcpyKind.cudaMemcpyHostToDevice
    )

    JCuda.cudaFreeHost(bPtr)

  }


  /**
   * Method for slicing that is tuned for Matrices.
   * @return
   */
  def apply[Slice1, Result](slice1: Slice1)(implicit canSlice: CanSlice[CuVector[V], Slice1, Result]) = {
    canSlice(this, slice1)
  }

  def release() = {
    data.release()
  }


}

object CuVector extends CuVectorFuns with CuVectorLowPrio {



  /**
   * The standard way to create an empty matrix, size is length
   */
  def zeros[V](length: Int)(implicit ct: ClassTag[V]): CuVector[V] = {
    val mat = new CuVector[V](length)

    JCuda.cudaMemset(mat.data.toCuPointer, 0, mat.size * mat.elemSize)

    mat
  }

  /**
   * The standard way to create an empty matrix, size is length
   */
  def ones[V](length: Int)(implicit ct: ClassTag[V], semiring: Semiring[V], canSet: OpSet.InPlaceImpl2[CuVector[V], V]): CuVector[V] = {
    val mat = new CuVector[V](length)

    mat := semiring.one


    mat
  }

  def fromDense[V<:AnyVal](mat: DenseVector[V])(implicit ct: ClassTag[V], blas: cublasHandle) = {
    val g = new CuVector[V](mat.length)
    g := mat
    g
  }

  /**
   * Doesn't zero the matrix.
   */
  def create[V](length: Int)(implicit ct: ClassTag[V]): CuVector[V] = {
    val mat = new CuVector[V](length)
    JCuda.cudaMemset(mat.data.toCuPointer, 0, mat.size * mat.elemSize)

    mat
  }

  def rand(length: Int)(implicit rand: RandBasis = Rand) = {
    import jcuda.jcurand.JCurand._
    val mat = new CuVector[Float](length)
    val generator = new curandGenerator()
    curandCreateGenerator(generator, curandRngType.CURAND_RNG_PSEUDO_DEFAULT)
    curandSetPseudoRandomGeneratorSeed(generator, rand.randInt.draw())

    curandGenerateUniform(generator, mat.data.toCuPointer, length)
    curandDestroyGenerator(generator)

    mat
  }

  implicit def canCreateZerosLike[V:ClassTag]:CanCreateZerosLike[CuVector[V], CuVector[V]] = new CanCreateZerosLike[CuVector[V], CuVector[V]] {
    override def apply(from: CuVector[V]): CuVector[V] = {
      zeros(from.length)
    }
  }

  implicit def canCopy[V:ClassTag](implicit handle: cublasHandle):CanCopy[CuVector[V]] = new CanCopy[CuVector[V]] {
    override def apply(from: CuVector[V]): CuVector[V] = {
      val a = create(from.length)
      a := from
      a
    }
  }

  implicit def canDotFloat(implicit handle: cublasHandle):OpMulInner.Impl2[CuVector[Float], CuVector[Float], Float] = {
    new OpMulInner.Impl2[CuVector[Float], CuVector[Float], Float] {
      override def apply(v: CuVector[Float], v2: CuVector[Float]): Float = {
        require(v.length == v2.length, "Length mismatch!")
        val ptr = cuda.allocateHost[Float](1)
        JCublas2.cublasSdot(handle, v.length, v.offsetPointer, v.stride, v2.offsetPointer, v2.stride, ptr.toCuPointer)
        val res = ptr.getFloat
        ptr.release()
        res
      }
    }

  }

  implicit def canAxpyFloat(implicit handle: cublasHandle):scaleAdd.InPlaceImpl3[CuVector[Float], Float, CuVector[Float]] = {
    new scaleAdd.InPlaceImpl3[CuVector[Float], Float, CuVector[Float]] {
      override def apply(v: CuVector[Float], v2: Float, v3: CuVector[Float]): Unit = {
        require(v.length == v3.length, "Length mismatch!")
        val ptr = cuda.allocateHost[Float](1)
        ptr.setFloat(v2)
        JCublas2.cublasSaxpy(handle, v.length, ptr.toCuPointer, v3.offsetPointer, v3.stride, v.offsetPointer, v.stride)
      }
    }

  }


  implicit def normImplFloat(implicit handle: cublasHandle): norm.Impl2[CuVector[Float], Double, Double] = new norm.Impl2[CuVector[Float], Double, Double] {
    override def apply(v: CuVector[Float], v2: Double): Double = {
      if(v2 == 2.0) {
        math.sqrt(v dot v)
      } else {
        ???
      }
    }
  }



  implicit def vspaceFloat(implicit handle: cublasHandle): MutableInnerProductVectorSpace[CuVector[Float], Float] = {
    MutableInnerProductVectorSpace.make[CuVector[Float], Float]
  }

  // slicing
  implicit def canSlice[V]: CanSlice[CuVector[V], Range, CuVector[V]] = __canSlice.asInstanceOf[CanSlice[CuVector[V], Range, CuVector[V]]]

  private val __canSlice: CanSlice[CuVector[Any], Range, CuVector[Any]]  = {
    new CanSlice[CuVector[Any], Range, CuVector[Any]] {
      def apply(v: CuVector[Any], re: Range): CuVector[Any] = {

        val r = re.getRangeWithoutNegativeIndexes( v.length )

        require(r.isEmpty || r.last < v.length)
        require(r.isEmpty || r.start >= 0)
        new CuVector(v.data, offset = v.offset + v.stride * r.start, stride = v.stride * r.step, length = r.length)
      }
    }
  }






}

trait CuVectorFuns extends CuVectorKernels { this: CuVector.type =>
  implicit val kernelsFloat: KernelBroker[Float] = new KernelBroker[Float]("float")

  implicit def acosImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[acos.type]("acos")
  implicit def asinImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[asin.type]("asin")
  implicit def atanImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[atan.type]("atan")

  implicit def acoshImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[acosh.type]("acosh")
  implicit def asinhImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[asinh.type]("asinh")
  implicit def atanhImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[atanh.type]("atanh")

  implicit def cosImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[cos.type]("cos")
  implicit def sinImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[sin.type]("sin")
  implicit def tanImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[tan.type]("tan")

  implicit def coshImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[cosh.type]("cosh")
  implicit def sinhImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[sinh.type]("sinh")
  implicit def tanhImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[tanh.type]("tanh")

  implicit def cbrtImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[cbrt.type]("cbrt")
  implicit def ceilImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[ceil.type]("ceil")
  //  implicit def cospiImpl[T](implicit broker: CuMapKernels[T]) =  broker.implFor[cospi.type]("cospi")
  implicit def erfcImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[erfc.type]("erfc")
  implicit def erfcinvImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[erfcinv.type]("erfcinv")
  implicit def erfImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[erf.type]("erf")
  implicit def erfinvImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[erfinv.type]("erfinv")
  implicit def expImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[exp.type]("exp")
  implicit def expm1Impl[T](implicit broker: KernelBroker[T]) =  broker.implFor[expm1.type]("expm1")
  implicit def fabsImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[abs.type]("fabs")
  implicit def floorImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[floor.type]("floor")
  implicit def j0Impl[T](implicit broker: KernelBroker[T]) =  broker.implFor[Bessel.i0.type]("j0")
  implicit def j1Impl[T](implicit broker: KernelBroker[T]) =  broker.implFor[Bessel.i1.type]("j1")
  implicit def lgammaImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[lgamma.type]("lgamma")
  implicit def log10Impl[T](implicit broker: KernelBroker[T]) =  broker.implFor[log10.type]("log10")
  implicit def log1pImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[log1p.type]("log1p")
  //  implicit def log2Impl[T](implicit broker: CuMapKernels[T]) =  broker.implFor[log2.type]("log2")
  //  implicit def logbImpl[T](implicit broker: CuMapKernels[T]) =  broker.implFor[logb.type]("logb")
  implicit def logImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[log.type]("log")
  implicit def sqrtImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[sqrt.type]("sqrt")
  implicit def rintImpl[T](implicit broker: KernelBroker[T]) =  broker.implFor[rint.type]("rint")
  //  implicit def truncImpl[T](implicit broker: CuMapKernels[T]) =  broker.implFor[trunc.type]("trunc")

  implicit def acosIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[acos.type]("acos")
  implicit def asinIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[asin.type]("asin")
  implicit def atanIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[atan.type]("atan")

  implicit def acoshIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[acosh.type]("acosh")
  implicit def asinhIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[asinh.type]("asinh")
  implicit def atanhIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[atanh.type]("atanh")

  implicit def cosIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[cos.type]("cos")
  implicit def sinIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[sin.type]("sin")
  implicit def tanIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[tan.type]("tan")

  implicit def coshIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[cosh.type]("cosh")
  implicit def sinhIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[sinh.type]("sinh")
  implicit def tanhIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[tanh.type]("tanh")

  implicit def cbrtIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[cbrt.type]("cbrt")
  implicit def ceilIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[ceil.type]("ceil")
  //  implicit def cospiIntoImpl[T](implicit broker: CuMapKernels[T]) =  broker.inPlaceImplFor[cospi.type]("cospi")
  implicit def erfcIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[erfc.type]("erfc")
  implicit def erfcinvIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[erfcinv.type]("erfcinv")
  implicit def erfIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[erf.type]("erf")
  implicit def erfinvIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[erfinv.type]("erfinv")
  implicit def expIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[exp.type]("exp")
  implicit def expm1IntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[expm1.type]("expm1")
  implicit def fabsIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[abs.type]("fabs")
  implicit def floorIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[floor.type]("floor")
  implicit def j0IntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[Bessel.i0.type]("j0")
  implicit def j1IntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[Bessel.i1.type]("j1")
  implicit def lgammaIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[lgamma.type]("lgamma")
  implicit def log10IntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[log10.type]("log10")
  implicit def log1pIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[log1p.type]("log1p")
  //  implicit def log2IntoImpl[T](implicit broker: CuMapKernels[T]) =  broker.inPlaceImplFor[log2.type]("log2")
  //  implicit def logbIntoImpl[T](implicit broker: CuMapKernels[T]) =  broker.inPlaceImplFor[logb.type]("logb")
  implicit def logIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[log.type]("log")
  implicit def sqrtIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[sqrt.type]("sqrt")
  implicit def rintIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImplFor[rint.type]("rint")

  implicit def negateImpl[T](implicit broker: KernelBroker[T]): UImpl[OpNeg.type, CuVector[T], CuVector[T]] =  broker.implFor[OpNeg.type]("negate")

  implicit def addImpl[T](implicit broker: KernelBroker[T]): UImpl2[OpAdd.type, CuVector[T], CuVector[T], CuVector[T]] =  broker.impl2For[OpAdd.type]("add")
  implicit def subImpl[T](implicit broker: KernelBroker[T]): UImpl2[OpSub.type, CuVector[T], CuVector[T], CuVector[T]] =  broker.impl2For[OpSub.type]("sub")
  implicit def mulImpl[T](implicit broker: KernelBroker[T]): UImpl2[OpMulScalar.type, CuVector[T], CuVector[T], CuVector[T]] =  broker.impl2For[OpMulScalar.type]("mul")
  implicit def divImpl[T](implicit broker: KernelBroker[T]) =  broker.impl2For[OpDiv.type]("div")
  implicit def modImpl[T](implicit broker: KernelBroker[T]) =  broker.impl2For[OpMod.type]("mod")
  implicit def maxImpl[T](implicit broker: KernelBroker[T]) =  broker.impl2For[max.type]("max")
  implicit def minImpl[T](implicit broker: KernelBroker[T]) =  broker.impl2For[min.type]("min")
  implicit def powImpl[T](implicit broker: KernelBroker[T]) =  broker.impl2For[OpPow.type]("pow")

  implicit def addIntoImpl[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpAdd.type, CuVector[T], CuVector[T]] =  broker.inPlaceImpl2For[OpAdd.type]("add")
  implicit def subIntoImpl[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpSub.type, CuVector[T], CuVector[T]] =  broker.inPlaceImpl2For[OpSub.type]("sub")
  implicit def mulIntoImpl[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpMulScalar.type, CuVector[T], CuVector[T]] =  broker.inPlaceImpl2For[OpMulScalar.type]("mul")
  implicit def divIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For[OpDiv.type]("div")
  implicit def modIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For[OpMod.type]("mod")
  implicit def maxIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For[max.type]("max")
  implicit def minIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For[min.type]("min")
  implicit def powIntoImpl[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For[OpPow.type]("pow")

  implicit def addIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpAdd.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpAdd.type]("add")
  implicit def subIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpSub.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpSub.type]("sub")
  implicit def mulIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpMulScalar.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpMulScalar.type]("mul")
  implicit def divIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpDiv.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpDiv.type]("div")
  implicit def modIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpMod.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpMod.type]("mod")
  implicit def maxIntoImpl_S[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For_v_s[max.type]("max")
  implicit def minIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[min.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[min.type]("min")
  implicit def powIntoImpl_S[T](implicit broker: KernelBroker[T]) =  broker.inPlaceImpl2For_v_s[OpPow.type]("pow")
  implicit def setIntoImpl_S[T](implicit broker: KernelBroker[T]): InPlaceImpl2[OpSet.type, CuVector[T], T] =  broker.inPlaceImpl2For_v_s[OpSet.type]("set")

  implicit def addImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpAdd.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpAdd.type]("add")
  implicit def subImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpSub.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpSub.type]("sub")
  implicit def mulImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpMulScalar.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpMulScalar.type]("mul")
  implicit def mulMatrixImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpMulMatrix.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpMulMatrix.type]("mul")
  implicit def divImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpDiv.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpDiv.type]("div")
  implicit def modImplVS[T](implicit broker: KernelBroker[T]): UImpl2[OpMod.type, CuVector[T], T, CuVector[T]] =  broker.impl2For_v_s[OpMod.type]("mod")
  implicit def powImplVS[T](implicit broker: KernelBroker[T]) =  broker.impl2For_v_s[OpPow.type]("pow")

  implicit def addImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpAdd.type]("add")
  implicit def subImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpSub.type]("sub")
  implicit def mulImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpMulScalar.type]("mul")
  implicit def mulMatrixImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpMulMatrix.type]("mul")
  implicit def divImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpDiv.type]("div")
  implicit def modImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpMod.type]("mod")
  implicit def powImplSV[T](implicit broker: KernelBroker[T]) =  broker.impl2For_s_v[OpPow.type]("pow")

  implicit def sumImpl[T](implicit broker: KernelBroker[T]) =  broker.reducerFor[sum.type]("add")
  implicit def maxReduceImpl[T](implicit broker: KernelBroker[T]) =  broker.reducerFor[max.type]("max")
  implicit def minReduceImpl[T](implicit broker: KernelBroker[T]) =  broker.reducerFor[min.type]("min")

  /*
  implicit def sumColImpl[T](implicit broker: KernelBroker[T]) =  broker.colReducerFor[sum.type]("add")
  implicit def maxColImpl[T](implicit broker: KernelBroker[T]) =  broker.colReducerFor[max.type]("max")
  implicit def minColImpl[T](implicit broker: KernelBroker[T]) =  broker.colReducerFor[min.type]("min")

  implicit def sumRowImpl[T](implicit broker: KernelBroker[T]) =  broker.rowReducerFor[sum.type]("add")
  implicit def maxRowImpl[T](implicit broker: KernelBroker[T]) =  broker.rowReducerFor[max.type]("max")
  implicit def minRowImpl[T](implicit broker: KernelBroker[T]) =  broker.rowReducerFor[min.type]("min")
  */

  class SetCuMCuMVOp[V] extends OpSet.InPlaceImpl2[CuVector[V], CuVector[V]] {
    def apply(a: CuVector[V], b: CuVector[V]) {
      a.writeFrom(b.asInstanceOf[CuVector[V]])
    }
  }

  implicit def setCuMCuMOp[V]:OpSet.InPlaceImpl2[CuVector[V], CuVector[V]] = new SetCuMCuMVOp[V]()

  implicit def setMDM[V](implicit stream: CUstream = new CUstream()): OpSet.InPlaceImpl2[CuVector[V], DenseVector[V]] = new OpSet.InPlaceImpl2[CuVector[V], DenseVector[V]] {
    def apply(v: CuVector[V], v2: DenseVector[V]): Unit = {
      v.writeFromDense(v2)
    }
  }

  implicit object softmaxImplFloat extends softmax.Impl[CuVector[Float], Float] {
    override def apply(v: CuVector[Float]): Float = {
      val m: Float = max(v)
      val temp = v - m
      exp.inPlace(temp)
      val res = log(sum(temp)) + m
      temp.data.release()
      res
    }
  }




}

trait CuVectorLowPrio { this: CuVector.type =>
  /** lbfgs wants a MIPS[T, Double], so this implicit allows us to fake it. */
  implicit def vspaceFloatPretendsToBeDouble(implicit handle: cublasHandle): MutableInnerProductVectorSpace[CuVector[Float], Double] = {

    implicit object addVSDouble extends OpAdd.Impl2[CuVector[Float], Double, CuVector[Float]] {
      override def apply(v: CuVector[Float], v2: Double): CuVector[Float] = v :+ v2.toFloat
    }

    implicit object subVSDouble extends OpSub.Impl2[CuVector[Float], Double, CuVector[Float]] {
      override def apply(v: CuVector[Float], v2: Double): CuVector[Float] = v :- v2.toFloat
    }

    implicit object mulVSDouble extends OpMulScalar.Impl2[CuVector[Float], Double, CuVector[Float]] {
      override def apply(v: CuVector[Float], v2: Double) = {v :* v2.toFloat}
    }

    implicit object addIntoVSDouble extends OpAdd.InPlaceImpl2[CuVector[Float], Double] {
      override def apply(v: CuVector[Float], v2: Double) = { v :+= v2.toFloat}
    }

    implicit object subIntoVSDouble extends OpSub.InPlaceImpl2[CuVector[Float], Double] {
      override def apply(v: CuVector[Float], v2: Double) = {v :-= v2.toFloat}
    }


    implicit object divVSDouble extends OpDiv.Impl2[CuVector[Float], Double, CuVector[Float]] {
      override def apply(v: CuVector[Float], v2: Double): CuVector[Float] = v :/ v2.toFloat
    }


    implicit object mulInner extends OpMulInner.Impl2[CuVector[Float], CuVector[Float], Double] {
      override def apply(v: CuVector[Float], v2: CuVector[Float]): Double = CuVector.canDotFloat(handle)(v, v2).toDouble
    }


    implicit object mulIntoVSDouble extends OpMulScalar.InPlaceImpl2[CuVector[Float], Double] {
      override def apply(v: CuVector[Float], v2: Double) = v :*= v2.toFloat
    }


    implicit object divIntoVSDouble extends OpDiv.InPlaceImpl2[CuVector[Float], Double] {
      override def apply(v: CuVector[Float], v2: Double) = v :/= v2.toFloat
    }

    implicit object scaleAddVS extends scaleAdd.InPlaceImpl3[CuVector[Float], Double, CuVector[Float]] {
        override def apply(v: CuVector[Float], v2: Double, v3: CuVector[Float]): Unit = {
          scaleAdd.inPlace(v, v2.toFloat, v3)
        }
    }

    MutableInnerProductVectorSpace.make[CuVector[Float], Double]



  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy