All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fif.Data.scala Maven / Gradle / Ivy

The newest version!
package fif

import scala.language.higherKinds
import scala.reflect.ClassTag

import simulacrum._

/**
 * Trait that abstractly represents operations that can be performed on a dataset.
 * The implementation of Data is suitable for both large-scale, distributed data
 * or in-memory structures.
 */
@typeclass trait Data[D[_]] extends Serializable {

  /** Transform a dataset by applying f to each element. */
  def map[A, B: ClassTag](d: D[A])(f: A => B): D[B]

  def mapParition[A, B: ClassTag](d: D[A])(f: Iterable[A] => Iterable[B]): D[B]

  /** Apply a side-effecting function to each element. */
  def foreach[A](d: D[A])(f: A => Any): Unit

  def foreachPartition[A](d: D[A])(f: Iterable[A] => Any): Unit

  def filter[A](d: D[A])(f: A => Boolean): D[A]

  /**
   * Starting from a defined zero value, perform an operation seqOp on each element
   * of a dataset. Combine results of seqOp using combOp for a final value.
   */
  def aggregate[A, B: ClassTag](d: D[A])(zero: B)(seqOp: (B, A) => B, combOp: (B, B) => B): B

  /** Sort the dataset using a function f that evaluates each element to an orderable type */
  def sortBy[A, B: ClassTag](d: D[A])(f: (A) => B)(implicit ord: math.Ordering[B]): D[A]

  /** Construct a traversable for the first k elements of a dataset. Will load into main mem. */
  def take[A](d: D[A])(k: Int): Traversable[A]

  def headOption[A](d: D[A]): Option[A]

  /** Load all elements of the dataset into an array in main memory. */
  def toSeq[A](d: D[A]): Seq[A]

  def flatMap[A, B: ClassTag](d: D[A])(f: A => TraversableOnce[B]): D[B]

  def flatten[A, B: ClassTag](d: D[A])(implicit asTraversable: A => TraversableOnce[B]): D[B]

  def groupBy[A, B: ClassTag](d: D[A])(f: A => B): D[(B, Iterable[A])]

  def reduce[A](d: D[A])(op: (A, A) => A): A

  def size[A](d: D[A]): Long

  def isEmpty[A](d: D[A]): Boolean

  def zip[A, B: ClassTag](d: D[A])(that: D[B]): D[(A, B)]

  def zipWithIndex[A](d: D[A]): D[(A, Long)]

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy