All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scalaz.BKTree.scala Maven / Gradle / Ivy

The newest version!
package org.specs2.internal.scalaz

import collection.immutable.IntMap
import annotation.tailrec

import BKTree._

/**
 * Burkhard-Keller trees provide an implementation of sets which apart
 * from the ordinary operations also has an approximate member search,
 * allowing you to search for elements that are of a distance `n` from
 * the element you are searching for. The distance is determined using
 * a metric on the type of elements. Therefore all elements must
 * implement the [[scalaz.MetricSpace]] type class, rather than the more usual
 * [[scalaz.Ordering]].
 *
 * The worst case complexity of many of these operations is quite bad,
 * but the expected behavior varies greatly with the metric. For
 * example, the discrete metric (`distance x y | y == x = 0 |
 * otherwise = 1`) makes BK-trees behave abysmally. The metrics
 * mentioned above should give good performance characteristics.
 *
 * This implementation is a port of Haskell's [[http://hackage.haskell.org/packages/archive/bktrees/0.2.1/doc/html/src/Data-Set-BKTree.html Data.Set.BKTree]]
 */
sealed trait BKTree[A] {
  def isEmpty: Boolean =
    this match {
      case BKTreeEmpty()       => true
      case BKTreeNode(_, _, _) => false
    }

  def map[B](f: A => B): BKTree[B] =
    this match {
      case BKTreeEmpty()       => BKTreeEmpty()
      case BKTreeNode(a, s, c) => BKTreeNode(f(a), s, c.transform((_: Int, z: BKTree[A]) => z map f))
    }

  def size: Int =
    this match {
      case BKTreeEmpty()       => 0
      case BKTreeNode(_, s, _) => s
    }

  def +(a: A)(implicit A: MetricSpace[A]): BKTree[A] =
    this match {
      case BKTreeEmpty()       => BKTreeNode(a, 1, IntMap.empty)
      case BKTreeNode(v, s, c) => {
        val d = A.distance(v, a)
        BKTreeNode(v, s + 1, c + ((d, c get d match {
          case None    => BKTreeNode(a, 1, IntMap.empty)
          case Some(w) => w + a
        })))
      }
    }

  def ++(t: BKTree[A])(implicit m: MetricSpace[A]): BKTree[A] = {
    var k: BKTree[A] = this
    for (v <- t.values)
      k = k + v
    k
  }

  def values: List[A] =
    this match {
      case BKTreeEmpty()       => Nil
      case BKTreeNode(v, _, c) => v :: c.valuesIterator.toList.flatMap(_.values)
    }


  /**
   * Returns true of this set contains `a`.
   */
  @tailrec
  final def contains(a: A)(implicit A: MetricSpace[A]): Boolean =
    this match {
      case BKTreeEmpty()       => false
      case BKTreeNode(v, _, c) =>
        val d = A.distance(v, a)
        d == 0 || (c get d match {
          case None    => false
          case Some(w) => w contains a
        })
    }

  /** An alias for `contains` */
  final def -?-(a: A)(implicit A: MetricSpace[A]): Boolean = contains(a)

  /** Returns true if this set contains an element which has a distance from `a` that is less than or equal to `n` */
  def containsApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean =
    this match {
      case BKTreeEmpty()       => false
      case BKTreeNode(v, _, c) =>
        val d = A.distance(v, a)
        d <= n || (subChildren(d, n) exists (_._2 containsApproximate(a, n)))
    }

  /** An alias for `containsApproximate` */
  def =?=(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean = containsApproximate(a, n)

  /** Returns the elements which have an distance from `a` that is less than or equal to `n`. */
  def valuesApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] =
    this match {
      case BKTreeEmpty()       => Nil
      case BKTreeNode(v, _, c) =>
        val d = A.distance(v, a)
        val k = subChildren(d, n).valuesIterator.toList flatMap (_ valuesApproximate(a, n))
        if (d <= n)
          v :: k
        else
          k
    }

  /** An alias for `valuesApproximate` */
  def |=|(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] = valuesApproximate(a, n)

  private type M[A] = IntMap[A]

  private def subChildren(d: Int, n: Int): M[BKTree[A]] =
    this match {
      case BKTreeEmpty()       => IntMap.empty
      case BKTreeNode(_, _, c) => subMap(c, d, n)
    }

  private def subMap(m: M[BKTree[A]], d: Int, n: Int): M[BKTree[A]] =
    splitMap(splitMap(m, d - n - 1)._2, d + n + 1)._1

  private def splitChildren(k: Int): (M[BKTree[A]], M[BKTree[A]]) =
    this match {
      case BKTreeEmpty()       => (IntMap.empty, IntMap.empty)
      case BKTreeNode(_, _, c) => splitMap(c, k)
    }

  private def splitMap(m: M[BKTree[A]], k: Int): (M[BKTree[A]], M[BKTree[A]]) = {
    var m1: M[BKTree[A]] = IntMap.empty
    var m2: M[BKTree[A]] = IntMap.empty
    for ((i, v) <- m.iterator) {
      if (i < k)
        m1 = m1 + ((i, v))
      else if (i > k)
        m2 = m2 + ((i, v))
    }
    (m1, m2)
  }
}

private case class BKTreeNode[A](value: A, sz: Int, children: IntMap[BKTree[A]]) extends BKTree[A]

private case class BKTreeEmpty[A]() extends BKTree[A]

object BKTree extends BKTreeFunctions with BKTreeInstances {
  def apply[A: MetricSpace](as: A*): BKTree[A] = as.foldLeft(emptyBKTree[A])((b, a) => b + a)
}

trait BKTreeFunctions {
  def emptyBKTree[A]: BKTree[A] = BKTreeEmpty()
}

trait BKTreeInstances {
  implicit def bKTreeInstance: Functor[BKTree] with Length[BKTree] = new Functor[BKTree] with Length[BKTree] {
    def map[A, B](fa: BKTree[A])(f: (A) => B): BKTree[B] = fa map f
    def length[A](fa: BKTree[A]): Int = fa.size
  }
  implicit def bKTreeMonoid[A: MetricSpace]: Monoid[BKTree[A]] = new Monoid[BKTree[A]] {
    def append(f1: BKTree[A], f2: => BKTree[A]): BKTree[A] = f1 ++ f2
    def zero: BKTree[A] = BKTree[A]()
  }
  implicit def bkTreeEqual[A](implicit A: Equal[A]) = {
    import std.list._
    Equal.equalBy((ba: BKTree[A]) => ba.values)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy