All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.emmalanguage.api.FlinkMutableBag.scala Maven / Gradle / Ivy

/*
 * Copyright © 2014 TU Berlin ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.emmalanguage
package api

import FlinkMutableBag.State
import Meta.Projections._
import flink._
import flink.FlinkOps.cache

import org.apache.flink.api.scala.DataSet
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.util.Collector

class FlinkMutableBag[K: Meta, V: Meta] private
(
  /* Distributed state modeled naively as a mutable reference to an immutable bag. */
  var ss: DataBag[State[K, V]]
)(
  implicit flink: ExecutionEnvironment
) extends MutableBag[K, V] with Serializable {

  import FlinkDataSet.typeInfoForType

  def update[M: Meta](ms: DataBag[Group[K, M]])(f: UpdateFunction[M]): DataBag[(K, V)] = {
    val conv = implicitly[DataSet[State[K, V]] => DataBag[State[K, V]]]
    ss = cache(conv((ss.as[DataSet] fullOuterJoin ms.as[DataSet]).where(0).equalTo(0)(
      (s: State[K, V], m: Group[K, M], out: Collector[State[K, V]]) => {
        val rs = Option(m) match {
          case Some(m) =>
            val vOld = Option(s)
            val vNew = f(m.key, vOld.map(_.v), m.values)
            vNew.map(State(m.key, _, true)) orElse vOld.map(_.copy(changed = false))
          case None =>
            Option(s).map(_.copy(changed = false))
        }
        rs.foreach(out.collect)
      })))
    for (s <- ss if s.changed) yield s.k -> s.v
  }

  def bag(): DataBag[(K, V)] =
    for (s <- ss) yield s.k -> s.v

  def copy(): MutableBag[K, V] =
    new FlinkMutableBag[K, V](ss)
}

object FlinkMutableBag {

  case class State[K, V](k: K, v: V, changed: Boolean = true)

  def apply[K: Meta, V: Meta](
    init: DataBag[(K, V)]
  )(
    implicit flink: ExecutionEnvironment
  ): MutableBag[K, V] = new FlinkMutableBag(cache(for {
    (k, v) <- init
  } yield State(k, v)))

  private[api] val tempNames = Stream.iterate(0)(_ + 1)
    .map(i => f"stateful$i%03d")
    .toIterator
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy