All Downloads are FREE. Search and download functionalities are using the official Maven repository.

breeze.linalg.VectorBuilder.scala Maven / Gradle / Ivy

The newest version!
package breeze.linalg
/*
 Copyright 2012 David Hall

 Licensed under the Apache License, Version 2.0 (the "License")
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
*/
import operators._
import scala.{specialized=>spec}
import support._
import breeze.util.{Sorting, ArrayUtil}
import breeze.generic.{CanMapValues, URFunc}
import breeze.math.{Field, MutableVectorSpace, Semiring, Ring}
import breeze.storage.DefaultArrayValue


/**
 * A VectorBuilder is basically unsorted Sparse Vector. Two parallel
 * arrays are maintained, one of indices, and another of values.
 * The indices are not sorted. Moreover,  indices are not unique in
 * the index array. Furthermore, apply(i) and update(i, v) are linear in the number
 * of active values in the array.
 *
 * + and - are linear operations: they just append to the end.
 * Component wise multiply, divide, and dot product are also linear,
 * but require creating a HashVector copy. (TODO: maybe a SparseVector?)
 *
 * In general, these should never be used, except for building, or for doing feature
 * vector type things.
 *
 * @author dlwh
 */
@SerialVersionUID(1)
class VectorBuilder[@spec(Double,Int, Float) E](private var _index: Array[Int],
                                                private var _data: Array[E],
                                                private var used: Int,
                                                var length: Int)
                                               (implicit ring: Semiring[E],
                                                dfv: DefaultArrayValue[E]) extends NumericOps[VectorBuilder[E]] with Serializable {

  def this(length: Int, initialNonZero: Int = 0)(implicit ring: Semiring[E],
                                                 man: ClassManifest[E],
                                                 dfv: DefaultArrayValue[E]) = this(new Array[Int](0), new Array[E](0), 0, length)


  def size = length


  def data  = _data
  def index = _index
  def activeSize = used

  def repr = this

  def contains(i: Int) = _index.contains(i)

  def apply(i: Int) = {
    if(i < 0 || i > size) throw new IndexOutOfBoundsException(i + " not in [0,"+size+")")

    var off = 0
    var acc = ring.zero
    while(off < used) {
      if(_index(off) == i) acc = ring.+(acc, _data(off))
      off += 1
    }

    acc
  }

  def update(i: Int, v: E) {
    if(i < 0 || i > size) throw new IndexOutOfBoundsException(i + " not in [0,"+size+")")
    var marked = false
    var off = 0
    while(off < used) {
      if(_index(off) == i) {
        if(!marked)
          _data(off) = v
        else _data(off) = ring.zero
        marked = true
      }

      off += 1
    }
  }

  def add(i: Int, v: E) {
    if(i < 0 || i > size) throw new IndexOutOfBoundsException(i + " not in [0,"+size+")")

    if(_data.length <= used) {
      _data = ArrayUtil.copyOf(_data, math.max(_data.length * 2, 1))
      _index = ArrayUtil.copyOf(_index, math.max(_index.length * 2, 1))
    }
    _data(used) = v
    _index(used) = i
    used += 1
  }

  def activeIterator = toHashVector.activeIterator

  def activeValuesIterator = toHashVector.activeValuesIterator

  def activeKeysIterator = toHashVector.activeKeysIterator

  // TODO: allow this to vary
  /** This is always assumed to be equal to 0, for now. */
  def default = ring.zero

  def isActive(rawIndex: Int) = rawIndex < used && rawIndex > 0

  override def toString = {
    (index.iterator zip data.iterator).take(used).mkString("USVector(",", ", ")")
  }

  def copy: VectorBuilder[E] = {
    new VectorBuilder[E](ArrayUtil.copyOf(index, index.length), ArrayUtil.copyOf(data, index.length), activeSize, size)
  }

  def zerosLike: VectorBuilder[E] = {
    new VectorBuilder[E](new Array[Int](0), ArrayUtil.newArrayLike(data, 0), 0, size)
  }

  def reserve(nnz: Int) {
    if(nnz < _data.length) {
      _data = ArrayUtil.copyOf(_data, nnz)
      _index = ArrayUtil.copyOf(_index, nnz)
    }
  }

  def toHashVector = {
    implicit val man = ClassManifest.fromClass(_data.getClass.getComponentType.asInstanceOf[Class[E]])
    val hv = HashVector.zeros[E](length)
    var i = 0
    while(i < used) {
      hv(index(i)) = ring.+(hv(index(i)),data(i))
      i += 1
    }
    hv
  }

  def toSparseVector = {
    val index = this.index
    val values = this.data

    val outIndex = new Array[Int](index.length)
    val outValues = ArrayUtil.newArrayLike(values, values.length)

    val ord = sortedIndices(index)
    if(ord.length > 0) {
      outIndex(0) = index(ord(0))
      outValues(0) = values(ord(0))
      if(index(ord.last) >= length)
        throw new RuntimeException("Index " + index(ord.last) + " exceeds dimension " + length)
      else if (outIndex(0) < 0)
        throw new RuntimeException("Index " + outIndex(0) + " is less than 0!")
    }
    var i   = 1
    var out = 0
    while(i < ord.length) {
      if(outIndex(out) == index(ord(i))) {
        outValues(out) = ring.+(outValues(out), values(ord(i)))
      } else {
        out += 1
        outIndex(out) = index(ord(i))
        outValues(out) = values(ord(i))
      }
      i += 1
    }

    if(ord.length > 0)
      out += 1

    require(ord.length == 0 || length > outIndex.last, "Index out of bounds in constructing sparse vector.")
    new SparseVector(outIndex, outValues, out, length)
  }

  private def sortedIndices(indices: Array[Int]) = {
    val arr = VectorBuilder.range(used)
    Sorting.indexSort(arr, 0, used, indices)
    arr
  }


  def compact() {
    val ah = toSparseVector
    clear()
    reserve(ah.activeSize)
    var i = 0
    while(i < ah.iterableSize) {
      if(ah.isActive(i)) {
        add(ah.index(i), ah.data(i))
      }
      i += 1
    }
  }

  def clear() {
    used = 0
    _index = new Array[Int](0)
    _data = ArrayUtil.newArrayLike(data, 0)
  }


  override def equals(p1: Any): Boolean = (this eq p1.asInstanceOf[AnyRef]) || (p1 match {
    case vb: VectorBuilder[_] =>
      this.length == vb.length && vb.toHashVector == this.toHashVector
    case _ => false
  })

  /**
   * Sets the underlying sparse array to use this data
   * @param index must be a sorted list of indices
   * @param data values corresponding to the index
   * @param activeSize number of active elements. The first activeSize will be used.
   */
  def use(index: Array[Int], data: Array[E], activeSize: Int) {
    require(activeSize >= 0, "activeSize must be non-negative")
    require(data.length >= activeSize, "activeSize must be no greater than array length...")
    _data = data
    _index = index
    used = activeSize
  }

  /**
   * same as data(i). Gives the value at the underlying offset.
   * @param i index into the data array
   * @return
   */
  def valueAt(i: Int): E = data(i)

  /**
   * Gives the logical index from the physical index.
   * @param i
   * @return
   */
  def indexAt(i: Int): Int = index(i)

  /**
   * Only gives true if isActive would return true for all i. (May be false anyway)
   * @return
   */
  def allVisitableIndicesActive: Boolean = true
}

object VectorBuilder extends VectorBuilderOps_Double {

  def zeros[@spec(Double, Float, Int) V: ClassManifest:Semiring:DefaultArrayValue](size: Int, initialNonzero: Int = 16) = new VectorBuilder(size, initialNonzero)
  def apply[@spec(Double, Float, Int) V:Semiring:DefaultArrayValue](values: Array[V]) = new VectorBuilder(Array.range(0,values.length), values, values.length, values.length)

  def apply[V:ClassManifest:Semiring:DefaultArrayValue](values: V*):VectorBuilder[V] = apply(values.toArray)
  def fill[@spec(Double, Int, Float) V:ClassManifest:Semiring:DefaultArrayValue](size: Int)(v: =>V):VectorBuilder[V] = apply(Array.fill(size)(v))
  def tabulate[@spec(Double, Int, Float) V:ClassManifest:Semiring:DefaultArrayValue](size: Int)(f: Int=>V):VectorBuilder[V]= apply(Array.tabulate(size)(f))

  def apply[V:ClassManifest:Semiring:DefaultArrayValue](length: Int)(values: (Int, V)*) = {
    val r = zeros[V](length)
    for( (i, v) <- values) {
      r(i) = v
    }
    r
  }


  // implicits
  class CanCopyBuilder[@spec(Int, Float, Double) V:ClassManifest:Semiring:DefaultArrayValue] extends CanCopy[VectorBuilder[V]] {
    def apply(v1: VectorBuilder[V]) = {
      v1.copy
    }
  }

  class CanZerosBuilder[@spec(Int, Float, Double) V:ClassManifest:Semiring:DefaultArrayValue] extends CanCreateZerosLike[VectorBuilder[V], VectorBuilder[V]] {
    def apply(v1: VectorBuilder[V]) = {
      v1.zerosLike
    }
  }

  implicit def canCopyBuilder[@spec(Int, Float, Double) V: ClassManifest: Semiring:DefaultArrayValue] = new CanCopyBuilder[V]
  implicit def canZerosBuilder[@spec(Int, Float, Double) V: ClassManifest: Semiring:DefaultArrayValue] = new CanZerosBuilder[V]

  implicit def negFromScale[@spec(Int, Float, Double)  V, Double](implicit scale: BinaryOp[VectorBuilder[V], V, OpMulScalar, VectorBuilder[V]], field: Ring[V]) = {
    new UnaryOp[VectorBuilder[V], OpNeg, VectorBuilder[V]] {
      override def apply(a : VectorBuilder[V]) = {
        scale(a, field.negate(field.one))
      }
    }
  }


  // private stuff


  // Sigh, Array.range is slow.
  private def range(length: Int) = {
    val result = new Array[Int](length)
    var i = 0
    while(i < length) {
      result(i) = i
      i += 1
    }
    result
  }

}

trait VectorBuilderOps_Double { this: VectorBuilder.type =>
  implicit val canScaleInto_Double: BinaryUpdateOp[VectorBuilder[Double], Double, OpMulScalar] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], Double, OpMulScalar]  {
      def apply(a: VectorBuilder[Double], b: Double) {
        var i = 0
        while(i < a.activeSize) {
          a.data(i) *= b
          i += 1
        }
      }
    }

  }

  implicit val canDivInto_Double: BinaryUpdateOp[VectorBuilder[Double], Double, OpDiv] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], Double, OpDiv]  {
      def apply(a: VectorBuilder[Double], b: Double) {
        var i = 0
        while(i < a.activeSize) {
          a.data(i) /= b
          i += 1
        }
      }
    }

  }

  implicit val canAddInto_VV_Double: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpAdd] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpAdd]  {
      def apply(a: VectorBuilder[Double], b: VectorBuilder[Double]) {
        require(a.length == b.length, "Dimension mismatch!")
        a.reserve(a.activeSize + b.activeSize)
        var i = 0
        while(i < b.activeSize) {
          a.add(b.index(i), b.data(i))
          i += 1
        }
      }
    }

  }

  implicit val canSubInto_VV_Double: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSub] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSub]  {
      def apply(a: VectorBuilder[Double], b: VectorBuilder[Double]) {
        require(a.length == b.length, "Dimension mismatch!")
        a.reserve(a.activeSize + b.activeSize)
        var i = 0
        while(i < b.activeSize) {
          a.add(b.index(i), -b.data(i))
          i += 1
        }
      }
    }

  }


  implicit val canSet_Double: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSet] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSet]  {
      def apply(a: VectorBuilder[Double], b: VectorBuilder[Double]) {
        a.clear()
        a.reserve(b.activeSize)
        var i = 0
        while(i < b.activeSize) {
          a.add(b.index(i), b.data(i))
          i += 1
        }
      }
    }

  }


  implicit val mulVS_Double: BinaryOp[VectorBuilder[Double], Double, OpMulScalar, VectorBuilder[Double]] = {
    BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], Double, OpMulScalar]
  }

  implicit val divVS_Double: BinaryOp[VectorBuilder[Double], Double, OpDiv, VectorBuilder[Double]] = {
    BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], Double, OpDiv]
  }

  implicit val addVV_Double: BinaryOp[VectorBuilder[Double], VectorBuilder[Double], OpAdd, VectorBuilder[Double]] = {
    BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], VectorBuilder[Double], OpAdd]
  }

  implicit val subVV_Double: BinaryOp[VectorBuilder[Double], VectorBuilder[Double], OpSub, VectorBuilder[Double]] = {
    BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], VectorBuilder[Double], OpSub]
  }

  implicit val neg_Double: UnaryOp[VectorBuilder[Double], OpNeg, VectorBuilder[Double]] = {
    new UnaryOp[VectorBuilder[Double], OpNeg, VectorBuilder[Double]] {
      def apply(a: VectorBuilder[Double]): VectorBuilder[Double] = {
        val c = a.zerosLike
        c.reserve(a.size)
        var i = 0
        while(i < a.activeSize) {
          c.add(a.index(i), -a.data(i))
          i += 1
        }
        c
      }
    }
  }

  implicit val canAxpy_VB_VB_Double: CanAxpy[Double, VectorBuilder[Double], VectorBuilder[Double]] = {
    new  CanAxpy[Double, VectorBuilder[Double], VectorBuilder[Double]]  {
      def apply(s: Double, b: VectorBuilder[Double], a: VectorBuilder[Double]) {
        require(a.length == b.length, "Dimension mismatch!")
        a.reserve(b.activeSize + a.activeSize)
        var i = 0
        val bd = b.data
        while(i < b.activeSize) {
          a.add(b.index(i), s * bd(i))
          i += 1
        }
      }
    }
  }

  implicit val mvector_space_Double: MutableVectorSpace[VectorBuilder[Double], Double] = {
    new MutableVectorSpace[VectorBuilder[Double], Double] {
      def field: Field[Double] = Field.fieldD

       def isNumericOps(v: VectorBuilder[Double]): NumericOps[VectorBuilder[Double]] = v

       def zeros: CanCreateZerosLike[VectorBuilder[Double], VectorBuilder[Double]] = VectorBuilder.canZerosBuilder[Double]

       def copy: CanCopy[VectorBuilder[Double]] = VectorBuilder.canCopyBuilder[Double]

       def mulIntoVS: BinaryUpdateOp[VectorBuilder[Double], Double, OpMulScalar] = canScaleInto_Double

       def divIntoVS: BinaryUpdateOp[VectorBuilder[Double], Double, OpDiv] = canDivInto_Double

       def addIntoVV: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpAdd] = canAddInto_VV_Double
       def subIntoVV: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSub] = canSubInto_VV_Double


      def mulVS: BinaryOp[VectorBuilder[Double], Double, OpMulScalar, VectorBuilder[Double]] = {
        BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], Double, OpMulScalar]
      }

      def divVS: BinaryOp[VectorBuilder[Double], Double, OpDiv, VectorBuilder[Double]] = {
        BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], Double, OpDiv]
      }

      def addVV: BinaryOp[VectorBuilder[Double], VectorBuilder[Double], OpAdd, VectorBuilder[Double]] = {
        BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], VectorBuilder[Double], OpAdd]
      }

      def subVV: BinaryOp[VectorBuilder[Double], VectorBuilder[Double], OpSub, VectorBuilder[Double]] = {
        BinaryOp.fromCopyAndUpdate[VectorBuilder[Double], VectorBuilder[Double], OpSub]
      }


      implicit def neg: UnaryOp[VectorBuilder[Double], OpNeg, VectorBuilder[Double]] = neg_Double

      implicit def setIntoVV: BinaryUpdateOp[VectorBuilder[Double], VectorBuilder[Double], OpSet] = {
        canSet_Double
      }

      def close(a: VectorBuilder[Double], b: VectorBuilder[Double], tolerance: Double): Boolean = {
        (a.toHashVector - b.toHashVector).norm(2) < tolerance
      }

      implicit def axpyVV: CanAxpy[Double, VectorBuilder[Double], VectorBuilder[Double]] = canAxpy_VB_VB_Double
    }
  }

  // operations involving vectors:
  implicit def canAddInto_V_VB_Double[V<:Vector[Double]]: BinaryUpdateOp[V, VectorBuilder[Double], OpAdd] =  {
    new  BinaryUpdateOp[V, VectorBuilder[Double], OpAdd]  {
      def apply(a: V, b: VectorBuilder[Double]) {
        require(a.length == b.length, "Dimension mismatch!")
        var i = 0
        val bd = b.data
        while(i < b.activeSize) {
          a(b.index(i)) -= bd(i)
          i += 1
        }
      }
    }

  }

  implicit def canSubInto_V_VB_Double[V<:Vector[Double]]: BinaryUpdateOp[V, VectorBuilder[Double], OpSub] =  {
    new  BinaryUpdateOp[V, VectorBuilder[Double], OpSub]  {
      def apply(a: V, b: VectorBuilder[Double]) {
        require(a.length == b.length, "Dimension mismatch!")
        var i = 0
        val bd = b.data
        while(i < b.activeSize) {
          a(b.index(i)) -= bd(i)
          i += 1
        }
      }
    }

  }

  implicit def canAddInto_VB_V_Double[V <: Vector[Double]]: BinaryUpdateOp[VectorBuilder[Double], V, OpAdd] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], V, OpAdd]  {
      def apply(a: VectorBuilder[Double], b: V) {
        b match {
          case b: StorageVector[Double] =>
            var i = 0
            val bd = b.data
            while(i < b.iterableSize) {
              if(b.isActive(i))
                a.add(b.indexAt(i), bd(i))
              i += 1
            }

          case _ =>
            a.reserve(a.activeSize + b.activeSize)
            require(a.length == b.length, "Dimension mismatch!")
            for( (i,v) <- b.activeIterator) {
              a.add(i, v)
            }
        }

      }
    }

  }

  implicit def canSubInto_VB_V_Double[V <: Vector[Double]]: BinaryUpdateOp[VectorBuilder[Double], V, OpSub] =  {
    new  BinaryUpdateOp[VectorBuilder[Double], V, OpSub]  {
      def apply(a: VectorBuilder[Double], b: V) {
        b match {
          case b: StorageVector[Double] =>
            var i = 0
            val bd = b.data
            while(i < b.iterableSize) {
              if(b.isActive(i))
                a.add(b.indexAt(i), -bd(i))
              i += 1
            }

          case _ =>
            a.reserve(a.activeSize + b.activeSize)
            require(a.length == b.length, "Dimension mismatch!")
            for( (i,v) <- b.activeIterator) {
              a.add(i, -v)
            }
        }

      }
    }

  }

  implicit def canDot_V_VB_Double[V<:Vector[Double]]: BinaryOp[V, VectorBuilder[Double], OpMulInner, Double] =  {
    new  BinaryOp[V, VectorBuilder[Double], OpMulInner, Double]  {
      def apply(a: V, b: VectorBuilder[Double]) =  {
        require(a.length == b.length, "Dimension mismatch!")
        var result : Double = 0
        var i = 0
        val bd = b.data
        while(i < b.activeSize) {
          result += a(b.index(i)) * bd(i)
          i += 1
        }
        result
      }
    }
  }

  implicit def canAxpy_V_VB_Double[V<:Vector[Double]]: CanAxpy[Double, VectorBuilder[Double], V] = {
    new  CanAxpy[Double, VectorBuilder[Double], V]  {
      def apply(s: Double, b: VectorBuilder[Double], a: V) {
        require(a.length == b.length, "Dimension mismatch!")
        var i = 0
        val bd = b.data
        while(i < b.activeSize) {
          a(b.index(i)) += s * bd(i)
          i += 1
        }
      }
    }
  }

  implicit def canDot_VB_V_Double[V<:Vector[Double]]: BinaryOp[VectorBuilder[Double], V, OpMulInner, Double] =  {
    new  BinaryOp[VectorBuilder[Double], V, OpMulInner, Double]  {
      def apply(a: VectorBuilder[Double], b: V) =  {
        canDot_V_VB_Double(b,a)
      }
    }
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy