All Downloads are FREE. Search and download functionalities are using the official Maven repository.

overflowdb.traversal.Traversal.scala Maven / Gradle / Ivy

There is a newer version: 1.193
Show newest version
package overflowdb.traversal

import org.slf4j.LoggerFactory
import overflowdb.traversal.help.Table.AvailableWidthProvider
import overflowdb.traversal.help.{Doc, DocSearchPackages, TraversalHelp}

import scala.collection.{
  Iterable,
  IterableFactory,
  IterableFactoryDefaults,
  IterableOnce,
  IterableOps,
  Iterator,
  mutable
}
import scala.collection.immutable.ArraySeq
import scala.jdk.CollectionConverters._
import scala.reflect.ClassTag

class TraversalSugarExt[A](val iter: Iterator[A]) extends AnyVal {
  type Traversal[A] = Iterator[A]

  /** Execute the traversal and convert the result to a list - shorthand for `toList` */
  @Doc(info = "Execute the traversal and convert the result to a list - shorthand for `toList`")
  def l: List[A] = iter.toList

  /** group elements and count how often they appear */
  @Doc(info = "group elements and count how often they appear")
  def groupCount[B >: A]: Map[B, Int] =
    groupCount(identity[A])

  /** group elements by a given transformation function and count how often the results appear */
  @Doc(info = "group elements by a given transformation function and count how often the results appear")
  def groupCount[B](by: A => B): Map[B, Int] = {
    val counts = mutable.Map.empty[B, Int].withDefaultValue(0)
    iter.foreach { a =>
      val b = by(a)
      val newValue = counts(b) + 1
      counts.update(b, newValue)
    }
    counts.to(Map)
  }

  def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f)
  def groupMap[K, B](key: A => K)(f: A => B): Map[K, List[B]] = l.groupMap(key)(f)
  def groupMapReduce[K, B](key: A => K)(f: A => B)(reduce: (B, B) => B): Map[K, B] = l.groupMapReduce(key)(f)(reduce)

  /** Execute the traversal and return a mutable.Set (better performance than `immutableSet` and has stable iterator
    * order)
    */
  def toSetMutable[B >: A]: mutable.LinkedHashSet[B] = mutable.LinkedHashSet.from(iter)

  /** Execute the traversal and convert the result to an immutable Set. */

  def toSetImmutable[B >: A]: Set[B] = iter.toSet

  /** Execute the traversal without returning anything */
  @Doc(info = "Execute the traversal without returning anything")
  def iterate(): Unit =
    while (iter.hasNext) iter.next()

  def countTrav: Traversal[Int] =
    Iterator.single(iter.size)

  def head: A = iter.next()

  def headOption: Option[A] = iter.nextOption()

  def last: A = {
    iter.hasNext
    var res = iter.next()
    while (iter.hasNext) res = iter.next()
    res
  }

  def lastOption: Option[A] =
    if (iter.hasNext) Some(last) else None

  /** casts all elements to given type note: this can lead to casting errors
    *
    * @see
    *   {{{collectAll}}} as a safe alternative
    */
  @Doc(info = "casts all elements to given type")
  def cast[B]: Traversal[B] =
    iter.asInstanceOf[Traversal[B]]

  /** collects all elements of the given class (beware of type-erasure) */
  @Doc(info = "collects all elements of the provided class (beware of type-erasure)")
  def collectAll[B](implicit ev: ClassTag[B]): Traversal[B] =
    iter.filter(ev.runtimeClass.isInstance).asInstanceOf[Traversal[B]]

  /** Deduplicate elements of this traversal - a.k.a. distinct, unique, ... */
  @Doc(info = "deduplicate elements of this traversal - a.k.a. distinct, unique, ...")
  def dedup: Traversal[A] =
    iter.distinct

  /** deduplicate elements of this traversal by a given function */
  @Doc(info = "deduplicate elements of this traversal by a given function")
  def dedupBy(fun: A => Any): Traversal[A] =
    iter.distinctBy(fun)

  /** sort elements by their natural order */
  @Doc(info = "sort elements by their natural order")
  def sorted[B >: A](implicit ord: Ordering[B]): Seq[B] = {
    (iter.to(ArraySeq.untagged): ArraySeq[B]).sorted
  }

  /** sort elements by the value of the given transformation function */
  @Doc(info = "sort elements by the value of the given transformation function")
  def sortBy[B](f: A => B)(implicit ord: Ordering[B]): Seq[A] =
    iter.to(ArraySeq.untagged).sortBy(f)

  /** Print help/documentation based on the current elementType `A`. Relies on all step extensions being annotated with
    * \@Traversal / @Doc Note that this works independently of tab completion and implicit conversions in scope - it
    * will simply list all documented steps in the classpath
    */
  @Doc(info = "print help/documentation based on the current elementType `A`.")
  def help[B >: A](implicit
      elementType: ClassTag[B],
      searchPackages: DocSearchPackages,
      availableWidthProvider: AvailableWidthProvider
  ): String =
    new TraversalHelp(searchPackages).forElementSpecificSteps(elementType.runtimeClass, verbose = false)

  @Doc(info = "print verbose help/documentation based on the current elementType `A`.")
  def helpVerbose[B >: A](implicit
      elementType: ClassTag[B],
      searchPackages: DocSearchPackages,
      availableWidthProvider: AvailableWidthProvider
  ): String =
    new TraversalHelp(searchPackages).forElementSpecificSteps(elementType.runtimeClass, verbose = true)
}
class TraversalFilterExt[A](val iterator: Iterator[A]) extends AnyVal {
  type Traversal[A] = Iterator[A]

  /** filters out everything that is _not_ the given value */
  @Doc(info = "filters out everything that is _not_ the given value")
  def is[B >: A](value: B): Traversal[A] =
    iterator.filter(_ == value)

  /** filters out all elements that are _not_ in the provided set */
  @Doc(info = "filters out all elements that are _not_ in the provided set")
  def within[B >: A](values: Set[B]): Traversal[A] =
    iterator.filter(values.contains)

  /** filters out all elements that _are_ in the provided set */
  @Doc(info = "filters out all elements that _are_ in the provided set")
  def without[B >: A](values: Set[B]): Traversal[A] =
    iterator.filterNot(values.contains)

}

class TraversalLogicExt[A](val iterator: Iterator[A]) extends AnyVal {
  type Traversal[A] = Iterator[A]

  /** perform side effect without changing the contents of the traversal */
  @Doc(info = "perform side effect without changing the contents of the traversal")
  def sideEffect(fun: A => _): Traversal[A] =
    iterator match {
      case pathAwareTraversal: PathAwareTraversal[A] => pathAwareTraversal._sideEffect(fun)
      case _ =>
        iterator.map { a => fun(a); a }
    }

  /** perform side effect without changing the contents of the traversal will only apply the partialFunction if it is
    * defined for the given input - analogous to `collect`
    */
  @Doc(info = "perform side effect without changing the contents of the traversal")
  def sideEffectPF(pf: PartialFunction[A, _]): Traversal[A] =
    sideEffect(pf.lift)

  /** only preserves elements if the provided traversal has at least one result */
  @Doc(info = "only preserves elements if the provided traversal has at least one result")
  def where(trav: Traversal[A] => Traversal[_]): Traversal[A] =
    iterator.filter { (a: A) =>
      trav(Iterator.single(a)).hasNext
    }

  /** only preserves elements if the provided traversal does _not_ have any results */
  @Doc(info = "only preserves elements if the provided traversal does _not_ have any results")
  def whereNot(trav: Traversal[A] => Traversal[_]): Traversal[A] =
    iterator.filter { (a: A) =>
      !trav(Iterator.single(a)).hasNext
    }

  /** only preserves elements if the provided traversal does _not_ have any results - alias for whereNot */
  @Doc(info = "only preserves elements if the provided traversal does _not_ have any results - alias for whereNot")
  def not(trav: Traversal[A] => Traversal[_]): Traversal[A] =
    whereNot(trav)

  /** only preserves elements for which _at least one of_ the given traversals has at least one result Works for
    * arbitrary amount of 'OR' traversals.
    *
    * @example
    *   {{{.or(_.label("someLabel"), _.has("someProperty"))}}}
    */
  @Doc(info = "only preserves elements for which _at least one of_ the given traversals has at least one result")
  def or(traversals: (Traversal[A] => Traversal[_])*): Traversal[A] = {
    iterator.filter { (a: A) =>
      traversals.exists { trav =>
        trav(Iterator.single(a)).hasNext
      }
    }
  }

  /** only preserves elements for which _all of_ the given traversals have at least one result Works for arbitrary
    * amount of 'AND' traversals.
    *
    * @example
    *   {{{.and(_.label("someLabel"), _.has("someProperty"))}}}
    */
  @Doc(info = "only preserves elements for which _all of_ the given traversals have at least one result")
  def and(traversals: (Traversal[A] => Traversal[_])*): Traversal[A] = {
    iterator.filter { (a: A) =>
      traversals.forall { trav =>
        trav(Iterator.single(a)).hasNext
      }
    }
  }

  /** union step from the current point
    *
    * @param traversals
    *   to be executed from here, results are being aggregated/summed/unioned
    * @example
    *   {{{.union(_.out, _.in)}}}
    */
  @Doc(info = "union/sum/aggregate/join given traversals from the current point")
  def union[B](traversals: (Traversal[A] => Traversal[B])*): Traversal[B] = iterator match {
    case pathAwareTraversal: PathAwareTraversal[A] => pathAwareTraversal._union(traversals: _*)
    case _ =>
      iterator.flatMap { (a: A) =>
        traversals.flatMap(_.apply(Iterator.single(a)))
      }
  }

  /** Branch step: based on the current element, match on something given a traversal, and provide resulting traversals
    * based on the matched element. Allows to implement conditional semantics: if, if/else, if/elseif, if/elseif/else,
    * ...
    *
    * @param on
    *   Traversal to get to what you want to match on
    * @tparam BranchOn
    *   required to be >: Null because the implementation is using `null` as the default value. I didn't find a better
    *   way to implement all semantics with the niceties of PartialFunction, and also yolo...
    * @param options
    *   PartialFunction from the matched element to the resulting traversal
    * @tparam NewEnd
    *   The element type of the resulting traversal
    * @example
    *   {{{
    * .choose(_.property(Name)) {
    *   case "L1" => _.out
    *   case "R1" => _.repeat(_.out)(_.maxDepth(3))
    *   case _ => _.in
    * }
    *   }}}
    * @see
    *   LogicalStepsTests
    */
  @Doc(info = "allows to implement conditional semantics: if, if/else, if/elseif, if/elseif/else, ...")
  def choose[BranchOn >: Null, NewEnd](
      on: Traversal[A] => Traversal[BranchOn]
  )(options: PartialFunction[BranchOn, Traversal[A] => Traversal[NewEnd]]): Traversal[NewEnd] = iterator match {
    case pathAwareTraversal: PathAwareTraversal[A] => pathAwareTraversal._choose[BranchOn, NewEnd](on)(options)
    case _ =>
      iterator.flatMap { (a: A) =>
        val branchOnValue: BranchOn = on(Iterator.single(a)).nextOption().getOrElse(null)
        options
          .applyOrElse(branchOnValue, (failState: BranchOn) => ((unused: Traversal[A]) => Iterator.empty[NewEnd]))
          .apply(Iterator.single(a))
      }
  }

  @Doc(info =
    "evaluates the provided traversals in order and returns the first traversal that emits at least one element"
  )
  def coalesce[NewEnd](options: (Traversal[A] => Traversal[NewEnd])*): Traversal[NewEnd] = iterator match {
    case pathAwareTraversal: PathAwareTraversal[A] => pathAwareTraversal._coalesce(options: _*)
    case _ =>
      iterator.flatMap { (a: A) =>
        options.iterator
          .map(_.apply(Iterator.single(a)))
          .collectFirst {
            case option if option.nonEmpty => option
          }
          .getOrElse(Iterator.empty)
      }
  }
}

class TraversalTrackingExt[A](val iterator: Iterator[A]) extends AnyVal {
  type Traversal[A] = Iterator[A]

  @Doc(info = "enable path tracking - prerequisite for path/simplePath steps")
  def enablePathTracking: PathAwareTraversal[A] =
    iterator match {
      case pathAwareTraversal: PathAwareTraversal[_] => throw new RuntimeException("path tracking is already enabled")
      case _ => new PathAwareTraversal[A](iterator.map { a => (a, Vector.empty) })
    }

  @Doc(info = "enable path tracking - prerequisite for path/simplePath steps")
  def discardPathTracking: Traversal[A] =
    iterator match {
      case pathAwareTraversal: PathAwareTraversal[A] => pathAwareTraversal.wrapped.map { _._1 }
      case _                                         => iterator
    }

  def isPathTracking: Boolean = iterator.isInstanceOf[PathAwareTraversal[_]]

  /** retrieve entire path that has been traversed thus far prerequisite: enablePathTracking has been called previously
    *
    * @example
    *   {{{
    *  myTraversal.enablePathTracking.out.out.path.toList
    *   }}}
    *   TODO would be nice to preserve the types of the elements, at least if they have a common supertype
    */
  @Doc(info = "retrieve entire path that has been traversed thus far")
  def path: Traversal[Vector[Any]] = iterator match {
    case tracked: PathAwareTraversal[A] =>
      tracked.wrapped.map { case (a, p) =>
        p.appended(a)
      }
    case _ =>
      throw new AssertionError(
        "path tracking not enabled, please make sure you have a `PathAwareTraversal`, e.g. via `Traversal.enablePathTracking`"
      )
  } // fixme: I think ClassCastException is the correct result when the user forgot to enable path tracking. But a better error message to go along with it would be nice.

  def simplePath: Traversal[A] = iterator match {
    case tracked: PathAwareTraversal[A] =>
      new PathAwareTraversal(tracked.wrapped.filter { case (a, p) =>
        mutable.Set.from(p).addOne(a).size == 1 + p.size
      })
    case _ =>
      throw new AssertionError(
        "path tracking not enabled, please make sure you have a `PathAwareTraversal`, e.g. via `Traversal.enablePathTracking`"
      )
  }
}

class TraversalRepeatExt[A](val trav: Iterator[A]) extends AnyVal {
  type Traversal[A] = Iterator[A]

  /** Repeat the given traversal
    *
    * By default it will continue repeating until there's no more results, not emit anything along the way, and use
    * depth first search.
    *
    * The @param behaviourBuilder allows you to configure end conditions (until|whilst|maxDepth), whether it should emit
    * elements it passes by, and which search algorithm to use (depth-first or breadth-first).
    *
    * Search algorithm: Depth First Search (DFS) vs Breadth First Search (BFS): DFS means the repeat step will go deep
    * before wide. BFS does the opposite: wide before deep. For example, given the graph
    * {{{L3 <- L2 <- L1 <- Center -> R1 -> R2 -> R3 -> R4}}} DFS will iterate the nodes in the order:
    * {{{Center, L1, L2, L3, R1, R2, R3, R4}}} BFS will iterate the nodes in the order:
    * {{{Center, L1, R1, R1, R2, L3, R3, R4}}}
    *
    * @example
    *   {{{
    * .repeat(_.out)                            // repeat until there's no more elements, emit nothing, use DFS
    * .repeat(_.out)(_.maxDepth(3))                            // perform exactly three repeat iterations
    * .repeat(_.out)(_.until(_.property(Name).endsWith("2")))  // repeat until the 'Name' property ends with '2'
    * .repeat(_.out)(_.emit)                                   // emit everything along the way
    * .repeat(_.out)(_.emit.breadthFirstSearch)                // emit everything, use BFS
    * .repeat(_.out)(_.emit(_.property(Name).startsWith("L"))) // emit if the 'Name' property starts with 'L'
    *   }}}
    * @note
    *   this works for domain-specific steps as well as generic graph steps - for details please take a look at the
    *   examples in RepeatTraversalTests: both '''.followedBy''' and '''.out''' work.
    * @see
    *   RepeatTraversalTests for more detail and examples for all of the above.
    */
  // @Doc(info = "repeat the given traversal")
  def repeat[B >: A](
      repeatTraversal: Traversal[A] => Traversal[B]
  )(implicit
      behaviourBuilder: RepeatBehaviour.Builder[B] => RepeatBehaviour.Builder[B] = RepeatBehaviour.noop[B] _
  ): Traversal[B] = {
    val behaviour = behaviourBuilder(new RepeatBehaviour.Builder[B]).build
    val _repeatTraversal =
      repeatTraversal
        .asInstanceOf[Traversal[B] => Traversal[B]] // this cast usually :tm: safe, because `B` is a supertype of `A`
    trav match {
      case tracked: PathAwareTraversal[A] =>
        val step = PathAwareRepeatStep(_repeatTraversal, behaviour)
        new PathAwareTraversal(tracked.wrapped.flatMap { case (a, p) =>
          step.apply(a).wrapped.map { case (aa, pp) => (aa, p ++ pp) }
        })
      case _ => trav.flatMap(RepeatStep(_repeatTraversal, behaviour))

    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy