All Downloads are FREE. Search and download functionalities are using the official Maven repository.

quasar.physical.mongodb.workflow.package.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014–2017 SlamData Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package quasar.physical.mongodb

import slamdata.Predef._
import quasar.{NonTerminal, RenderTree, RenderedTree}, RenderTree.ops._
import quasar.fp._
import quasar.fp.ski._
import quasar.jscore, jscore.JsCore
import quasar.physical.mongodb.accumulator._
import quasar.physical.mongodb.expression._, transform.wrapArrayInLet
import quasar.physical.mongodb.optimize.pipeline._
import quasar.physical.mongodb.workflowtask._

import matryoshka._
import matryoshka.data.Fix
import matryoshka.implicits._
import monocle.syntax.all._
import scalaz._, Scalaz._

/** A Workflow is a graph of atomic operations, with WorkflowOps for the
  * vertices. We crush them down into a WorkflowTask. This `crush` gives us a
  * location to optimize our workflow decisions. EG, A sequence of simple ops
  * may be combined into a single pipeline request, but if one of those
  * operations contains JS, we have to execute that outside of a pipeline,
  * possibly reordering the other operations to avoid having two pipelines with
  * a JS operation in the middle.
  *
  * We also implement the optimizations atomic
  * http://docs.mongodb.org/manual/core/aggregation-pipeline-optimization/ so
  * that we can build others potentially on top of them (including reordering
  * non-pipelines around pipelines, etc.).
  */
package object workflow {
  /** The type for workflows targeting MongoDB 3.2 specifically. */
  type Workflow3_2F[A] = WorkflowOpCoreF[A]

  /** The type for workflows supporting the most advanced capabilities. */
  type WorkflowF[A] = Workflow3_2F[A]
  type Workflow = Fix[WorkflowF]

  type FixOp[F[_]] = Fix[F] => Fix[F]

  /** A "newtype" for ops that appear in pipelines, for use mostly after a
    * workflow is constructed, with fixed type that can represent any workflow.
    */
  final case class PipelineOp(op: WorkflowF[Unit], bson: Bson.Doc) {
    def rewrite[F[_]](f: F[Unit] => Option[PipelineF[F, Unit]])
      (implicit I: F :<: WorkflowF): PipelineOp =
      I.prj(op).flatMap(f).cata(PipelineOp(_), this)
  }
  object PipelineOp {
    def apply[F[_]](f: PipelineF[F, Unit])(implicit I: F :<: WorkflowF): PipelineOp =
      PipelineOp(I.inj(f.wf), f.bson)
  }
  object PipelineOpCore {
    def unapply(p: PipelineOp): Option[WorkflowOpCoreF[Unit]] =
      Inject[WorkflowOpCoreF, WorkflowF].prj(p.op)
  }

  /** Quasar result sigil. */
  val QuasarSigilName  = BsonField.Name(sigil.Quasar)
  val QuasarSigilVar   = DocVar.ROOT(QuasarSigilName)

  /** MapReduce result expression key. */
  val ExprName   = BsonField.Name(sigil.Value)
  val ExprVar    = DocVar.ROOT(ExprName)

  /** MapReduce result identity key. */
  val IdName   = BsonField.Name(sigil.Id)
  val IdVar    = DocVar.ROOT(IdName)

  // NB: it's only safe to emit "core" expr ops here, but we always use the
  // largest type here, so they're immediately injected into ExprOp.
  import fixExprOp._

  def task[F[_]: Functor](fop: Crystallized[F])(implicit C: Crush[F]): WorkflowTask =
    (finish(_, _)).tupled(fop.op.para(C.crush[Fix]))._2.transAna[WorkflowTask](normalize)

  // NB: no need for a typeclass if implementing this way, but will be needed as
  //     soon as we need to coalesce anything _into_ a type that isn't 2.6.
  //     Furthermore, if this implementation is made implicit, then lots of
  //     functions that require it are able to resolve it from other evidence.
  //     Since that seems likely to be a short-lived phenomenon, instead for now
  //     implicits are defined below for just the specific types being used.
  def coalesceAll[F[_]: Functor](implicit I: WorkflowOpCoreF :<: F):
      Coalesce[F] = new Coalesce[F] {
    def coalesceƒ:
        F[Fix[F]] => Option[F[Fix[F]]] = {
      case I($MatchF(src, selector)) => src.project match {
        case I($SortF(src0, value)) =>
          I.inj($SortF(I.inj($MatchF(src0, selector)).embed, value)).some
        case I($MatchF(src0, sel0)) =>
          I.inj($MatchF(src0, sel0 ⊹ selector)).some
        case _ => None
      }
      case I(p @ $ProjectF(src, shape, id)) => src.project match {
        case I($ProjectF(src0, shape0, id0)) =>
          inlineProject(p, List(shape0)).map(sh => I.inj($ProjectF(src0, sh, id0 |+| id)))
        // Would like to inline a $project into a preceding $simpleMap, but
        // This is not safe, because sometimes a $project is inserted after
        // $simpleMap specifically to pull fields out of `value`, and those
        // $project ops need to be preserved.
        // case $SimpleMapF(src0, js, flatten, scope) =>
        //   shape.toJs.fold(
        //     κ(op),
        //     jsShape => chain(src0,
        //       $simpleMap(
        //         JsMacro(base =>
        //           jscore.Let(
        //             ListMap("__tmp" -> js(base)),
        //             jsShape(jscore.Ident("__tmp")))),
        //         flatten, scope)))
        case I($GroupF(src, grouped, by)) if id != ExcludeId =>
          inlineProjectGroup(shape, grouped).map(gr => I.inj($GroupF(src, gr, by)))
        case I($UnwindF(Embed(I($GroupF(src, grouped, by))), unwound))
            if id != ExcludeId =>
          inlineProjectUnwindGroup(shape, unwound, grouped).map { case (unwound, grouped) =>
            I.inj($UnwindF(I.inj($GroupF(src, grouped, by)).embed, unwound))
          }
        case _ => None
      }
      case I($SortF(Embed(I($SortF(src, sort1))), sort2)) =>
        I.inj($SortF(src, sort2 ⊹ sort1)).some
      case I($LimitF(src, count)) => src.project match {
        case I($LimitF(src0, count0)) =>
          I.inj($LimitF(src0, scala.math.min(count0, count))).some
        case I($SkipF(src0, count0)) =>
          I.inj($SkipF(I.inj($LimitF(src0, count0 + count)).embed, count0)).some
        case _ => None
      }
      case I($SkipF(src, count)) => src.project match {
        case I($SkipF(src0, count0)) => I.inj($SkipF(src0, count0 + count)).some
        case _                       => None
      }
      case I($GroupF(src, grouped, \/-($literal(bson)))) if bson != Bson.Null =>
        I.inj($GroupF(src, grouped, \/-($literal(Bson.Null)))).some
      case I(op0 @ $GroupF(_, _, _)) =>
        inlineGroupProjects(op0).map { case (src, gr, by) => I.inj($GroupF(src, gr, by)) }
      case I($GeoNearF(src, _, _, _, _, _, _, _, _, _)) => src.project match {
        // FIXME: merge the params
        case I($GeoNearF(_, _, _, _, _, _, _, _, _, _)) => None
        case _                                          => None
      }
      case I($MapF(src, fn, scope)) => src.project match {
        case I($MapF(src0, fn0, scope0)) =>
          Reshape.mergeMaps(scope0, scope).map(sc =>
            I.inj($MapF(src0, $MapF.compose(fn, fn0), sc)))
        case I($FlatMapF(src0, fn0, scope0)) =>
          Reshape.mergeMaps(scope0, scope).map(sc =>
            I.inj($FlatMapF(src0, $FlatMapF.mapCompose(fn, fn0), sc)))
        case _                   => None
      }
      case I($FlatMapF(src, fn, scope)) => src.project match {
        case I($MapF(src0, fn0, scope0))     =>
          Reshape.mergeMaps(scope0, scope).map(sc =>
            I.inj($FlatMapF(src0, $MapF.compose(fn, fn0), sc)))
        case I($FlatMapF(src0, fn0, scope0)) =>
          Reshape.mergeMaps(scope0, scope).map(sc =>
            I.inj($FlatMapF(src0, $FlatMapF.kleisliCompose(fn, fn0), sc)))
        case _                   => None
      }
      case I(sm @ $SimpleMapF(src, _, _)) => src.project match {
        case I(sm0 @ $SimpleMapF(_, _, _)) => I.inj(sm0 >>> sm).some
        case _                             => None
      }
      case I($FoldLeftF(head, tail)) => head.project match {
        case I($FoldLeftF(head0, tail0)) =>
          I.inj($FoldLeftF(head0, tail0 ⊹ tail)).some
        case _                       => None
      }
      case I($OutF(src, _)) => src.project match {
        case I($ReadF(_)) => src.project.some
        case _            => None
      }
      case _ => None
    }
  }

  def toPipelineOp[A](op: PipelineF[WorkflowF, A], base: DocVar): PipelineOp = {
    val prefix = prefixBase(base)

    def rewrite(wf: WorkflowOpCoreF[Unit]) =
      wf match {
        case wf @ $MatchF(_, _)           => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case wf @ $ProjectF(_, _, _)      => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $RedactF(_, _)          => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $SkipF(_, _)            => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case wf @ $LimitF(_, _)           => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case wf @ $UnwindF(_, _)          => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $GroupF(_, _, _)        => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $SortF(_, _)            => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case wf @ $GeoNearF(_, _, _, _, _, _, _, _, _, _) => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $OutF(_, _)             => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case wf @ $LookupF(_, _, _, _, _) => rewriteRefs3_2(prefix).apply(wf).pipeline
        case wf @ $SampleF(_, _)          => rewriteRefs3_2(prefix).apply(wf).shapePreserving
        case _ => scala.sys.error("unexpected WorkflowOp")
      }

    PipelineOp(rewrite(op.wf.void))
  }

  // helper for rewriteRefs
  def prefixBase(base: DocVar): PartialFunction[DocVar, DocVar] =
    PartialFunction(base \\ _)

  abstract class RewriteRefs[F[_]](val applyVar0: PartialFunction[DocVar, DocVar]) {
    val applyVar = (f: DocVar) => applyVar0.lift(f).getOrElse(f)

    def applyFieldName(name: BsonField): BsonField = {
      applyVar(DocField(name)).deref.getOrElse(name) // TODO: Delete field if it's transformed away to nothing???
    }

    def applySelector(s: Selector): Selector = s.mapUpFields(PartialFunction(applyFieldName _))

    def applyNel[A](m: NonEmptyList[(BsonField, A)]): NonEmptyList[(BsonField, A)] = m.map(t => applyFieldName(t._1) -> t._2)

    def apply[A <: F[_]](op: A): A
  }

  // NB: it's useful to be able to return the precise type here, so this is
  // explicitly implemented for each version's trait.
  // TODO: Make this a trait, and implement it for actual types, rather than all
  //       in here (already done for ExprOp and Reshape). (#438)
  private [workflow] def rewriteRefs3_2(f: PartialFunction[DocVar, DocVar])
        (implicit exprOps: ExprOpOps.Uni[ExprOp]) = new RewriteRefs[WorkflowOpCoreF](f) {
    def apply[A <: WorkflowOpCoreF[_]](op: A) = {
      (op match {
        case $ProjectF(src, shape, xId) =>
          $ProjectF(src, shape.rewriteRefs(applyVar0), xId)
        case $GroupF(src, grouped, by)  =>
          $GroupF(src,
            grouped.rewriteRefs(applyVar0),
            by.bimap(_.rewriteRefs(applyVar0), _.cata(exprOps.rewriteRefs(applyVar0))))
        case $MatchF(src, s)            => $MatchF(src, applySelector(s))
        case $RedactF(src, e)           => $RedactF(src, e.cata(exprOps.rewriteRefs(applyVar0)))
        case $UnwindF(src, f)           => $UnwindF(src, applyVar(f))
        case $SortF(src, l)             => $SortF(src, applyNel(l))
        case g: $GeoNearF[_]            =>
          g.copy(
            distanceField = applyFieldName(g.distanceField),
            query = g.query.map(applySelector))
        case $LookupF(src, from, lf, ff, as) =>
          // NB: rewrite only the source reference; the foreignField is not part
          // of the workflow at this point
          $LookupF(src, from, applyFieldName(lf), ff, applyFieldName(as))
        case _ => op
      }).asInstanceOf[A]
    }
  }

  def simpleShape[F[_]](op: Fix[F])(implicit I: F :<: WorkflowF): Option[List[BsonField.Name]] =
    simpleShape32(I.inj(op.unFix))

  @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
  def simpleShape32[F[_]](wf: WorkflowF[Fix[F]])(implicit I: F :<: WorkflowF): Option[List[BsonField.Name]] = {
    wf match {
        case $PureF(Bson.Doc(value))          =>
          value.keys.toList.map(BsonField.Name(_)).some
        case $ProjectF(_, Reshape(value), id) =>
          (if (id == IncludeId) IdName :: value.keys.toList
          else value.keys.toList).some
        case sm @ $SimpleMapF(_, _, _)        =>
          @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
          def loop(expr: JsCore): Option[List[jscore.Name]] =
            expr.simplify match {
              case jscore.Obj(value)      => value.keys.toList.some
              case jscore.Let(_, _, body) => loop(body)
              case _ => None
            }
          loop(sm.simpleExpr.expr).map(_.map(n => BsonField.Name(n.value)))
        case $GroupF(_, Grouped(value), _)    => (IdName :: value.keys.toList).some
        case $UnwindF(src, _)                 => simpleShape(src)
        case IsShapePreserving(sp)            => simpleShape(sp.src)
        case $LookupF(_, _, _, _, _)          => ???
        case $SampleF(_, _)                   => ???
        case _                                => None
    }
  }

  /** Newtype for source ops (that is, ops that are themselves sources). */
  // TODO: prevent construction of invalid instances
  final case class SourceF[F[_], A](wf: F[A]) {
    def op(implicit ev: Functor[F]): F[Unit] = wf.void

    def fmap[G[_], B](f: F[A] => G[B]): SourceF[G, B] =
      SourceF(f(wf))
  }
  object IsSource {
    def unapply[F[_], A](op: F[A])(implicit F: Classify[F]): Option[SourceF[F, A]] =
      F.source(op)
  }

  /** Newtype for ops which have a single source op. */
  // TODO: prevent construction of invalid instances
  abstract class SingleSourceF[F[_], A] { self =>
    def wf: F[A]
    def src: A
    def reparent[B](newSrc: B): SingleSourceF[F, B]

    /** Reparenting that handles coalescing (but is more restrictive as a
      * result).
      */
    // TODO: this doesn't seem to actually handle coalescing, so what was the
    // comment referring to?
    def reparentW[T](newSrc: T)(implicit T: Corecursive.Aux[T, F], F: Functor[F])
        : T =
      reparent(newSrc).wf.embed

    @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
    def fmap[G[_], B](f: A => B, g: F ~> G): SingleSourceF[G, B] =
      new SingleSourceF[G, B] {
        val src = f(self.src)
        val wf = g(self.reparent(src).wf)
        def reparent[C](newSrc: C) = self.reparent(newSrc).fmap(ι, g)
      }

    // NB: needed because making A covariant breaks pattern-matching ("GADT skolem" errors)
    def widen[B >: A]: SingleSourceF[F, B] = reparent(src)
  }
  object IsSingleSource {
    def unapply[F[_], A](op: F[A])(implicit F: Classify[F]): Option[SingleSourceF[F, A]] =
      F.singleSource(op)
  }

  /** Newtype for ops which can appear in aggregation pipeline. */
  // TODO: prevent construction of invalid instances
  abstract class PipelineF[F[_], A] extends SingleSourceF[F, A] { self =>
    // NB: narrows the result type
    def reparent[B](newSrc: B): PipelineF[F, B]

    def op: String
    def rhs: Bson
    def bson: Bson.Doc = Bson.Doc(ListMap(op -> rhs))

    // NB: narrows the result type
    @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
    override def fmap[G[_], B](f: A => B, g: F ~> G): PipelineF[G, B] =
      new PipelineF[G, B] {
        val src = f(self.src)
        val wf = g(self.reparent(src).wf)
        def reparent[C](newSrc: C) = self.reparent(newSrc).fmap(ι, g)

        def op = self.op
        def rhs = self.rhs
      }

    // NB: needed because making A covariant breaks pattern-matching ("GADT skolem" errors)
    override def widen[B >: A]: PipelineF[F, B] = reparent(src)
  }

  object IsPipeline {
    def unapply[F[_], A](op: F[A])(implicit F: Classify[F]): Option[PipelineF[F, A]] =
      F.pipeline(op)
  }

  /** Newtype for ops which preserve the shape of the input. */
  // TODO: prevent construction of invalid instances
  abstract class ShapePreservingF[F[_], A] extends PipelineF[F, A] { self =>
    // NB: narrows the result type
    def reparent[B](newSrc: B): ShapePreservingF[F, B]

    // NB: narrows the result type
    @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
    override def fmap[G[_], B](f: A => B, g: F ~> G): ShapePreservingF[G, B] =
      new ShapePreservingF[G, B] {
        val src = f(self.src)
        val wf = g(self.reparent(src).wf)
        def reparent[C](newSrc: C) = self.reparent(newSrc).fmap(ι, g)

        def op = self.op
        def rhs = self.rhs
      }

    // NB: needed because making A covariant breaks pattern-matching ("GADT skolem" errors)
    override def widen[B >: A]: ShapePreservingF[F, B] = reparent(src)
  }
  object IsShapePreserving {
    def unapply[F[_], A](op: F[A])(implicit F: Classify[F]): Option[ShapePreservingF[F, A]] =
      F.shapePreserving(op)
  }

  /**
   * Flattens the sequence of operations like so:
   * {{{
   * chain(
   *   \$read(Path.fileAbs("foo")),
   *   \$match(Selector.Where(Js.Bool(true))),
   *   \$limit(7))
   * }}}
   * {{{
   * val read = \$read(Path.fileAbs("foo"))
   * val match = \$match(Selector.Where(Js.Bool(true))(read)
   * \$limit(7)(match)
   * }}}
   */
  def chain[A](src: A, op1: A => A, ops: (A => A)*): A =
    ops.foldLeft(op1(src))((s, o) => o(s))

  implicit def workflowFCrush(implicit I: WorkflowOpCoreF :<: WorkflowF):
      Crush[WorkflowF] =
    new Crush[WorkflowF] {
      @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
      def crush[T[_[_]]: BirecursiveT](
        op: WorkflowF[(T[WorkflowF], (DocVar, WorkflowTask))]) = op match {
        case I($PureF(value)) => (DocVar.ROOT(), PureTask(value))
        case I($ReadF(coll))  => (DocVar.ROOT(), ReadTask(coll))
        case I(op @ $MatchF((src, rez), selector)) =>
          // TODO: If we ever allow explicit request of cursors (instead of
          //       collections), we could generate a FindQuery here.
          lazy val nonPipeline = {
            val (base, crushed) = (finish(_, _)).tupled(rez)
            (ExprVar,
              MapReduceTask(
                crushed,
                MapReduce(
                  $MapF.mapFn(base match {
                    case DocVar(DocVar.ROOT, None) => $MapF.mapNOP
                    case _                         => $MapF.mapProject(base)
                  }),
                  $ReduceF.reduceNOP,
                  // TODO: Get rid of this asInstanceOf!
                  selection = Some(rewriteRefs3_2(prefixBase(base)).apply(Functor[WorkflowOpCoreF].void(op).asInstanceOf[$MatchF[T[WorkflowOpCoreF]]]).selector)),
                None))
          }
          pipeline($MatchF[T[WorkflowF]](src, selector).shapePreserving.fmap(ι, I)) match {
            case Some((base, up, mine)) => (base, PipelineTask(up, mine))
            case None                   => nonPipeline
          }
        case IsPipeline(p) =>
          alwaysPipePipe(p.reparent(p.src._1)) match {
            case (base, up, pipe) => (base, PipelineTask(up, pipe))
          }

        case I(op @ $MapF(
            (_, (base, src1 @ MapReduceTask(src0, mr @ MapReduce(m, r, sel, sort, limit, None, scope0, _, _), oa))),
            fn, scope))
            if m == $MapF.mapNOP && r == $ReduceF.reduceNOP =>
          Reshape.mergeMaps(scope0, scope).fold(
            op.newMR(base, src1, sel, sort, limit))(
            s => base -> MapReduceTask(
              src0,
              mr applyLens MapReduce._map set fn
                applyLens MapReduce._scope set s,
              oa))

        // A "simple" map op that doesn't do any flattening is "inlined" into
        // the finalizer of a previous map-reduce.
        // TODO: handle more than one MapExpr.
        case I(op @ $SimpleMapF(
            (_, (base, src1 @ MapReduceTask(src0, mr @ MapReduce(_, _, _, _, _, None, scope0, _, _), oa))),
            NonEmptyList(MapExpr(expr), INil()),
            scope)) =>
          Reshape.mergeMaps(scope0, scope).fold(
            op.newMR(base, src1, None, None, None))(
              s => base -> MapReduceTask(
                src0,
                mr applyLens MapReduce._finalizer set Some($MapF.finalizerFn(expr))
                  applyLens MapReduce._scope set s,
                oa))

        case I(op @ $SimpleMapF(_, _, _)) => crush(I.inj(op.raw))

        case I(op @ $ReduceF((_, (base, src1 @ MapReduceTask(src0, mr @ MapReduce(_, reduceNOP, _, _, _, None, scope0, _, _), oa))), fn, scope)) =>
          Reshape.mergeMaps(scope0, scope).fold(
            op.newMR(base, src1, None, None, None))(
            s => base -> MapReduceTask(
              src0,
              mr applyLens MapReduce._reduce set fn
                applyLens MapReduce._scope set s,
              oa))

        case I(op: MapReduceF[_]) =>
          op.singleSource.src match {
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($MatchF(_, sel)))))) =>
              op.newMR(base, src0, Some(sel), None, None)
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($SortF(_, sort)))))) =>
              op.newMR(base, src0, None, Some(sort), None)
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($LimitF(_, count)))))) =>
              op.newMR(base, src0, None, None, Some(count))
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($MatchF(_, sel)), PipelineOpCore($SortF(_, sort)))))) =>
              op.newMR(base, src0, Some(sel), Some(sort), None)
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($MatchF(_, sel)), PipelineOpCore($LimitF(_, count)))))) =>
              op.newMR(base, src0, Some(sel), None, Some(count))
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($SortF(_, sort)), PipelineOpCore($LimitF(_, count)))))) =>
              op.newMR(base, src0, None, Some(sort), Some(count))
            case (_, (base, PipelineTask(src0, List(PipelineOpCore($MatchF(_, sel)), PipelineOpCore($SortF(_, sort)), PipelineOpCore($LimitF(_, count)))))) =>
              op.newMR(base, src0, Some(sel), Some(sort), Some(count))
            case (_, (base, srcTask)) =>
              val (nb, task) = finish(base, srcTask)
              op.newMR(nb, task, None, None, None)
          }

        case I($FoldLeftF(head, tail)) =>
          (ExprVar,
            FoldLeftTask(
              (finish(_, _)).tupled(head._2)._2,
              tail.map(_._2._2 match {
                case MapReduceTask(src, mr, _) =>
                  // FIXME: $FoldLeftF currently always reduces, but in future we’ll
                  //        want to have more control.
                  MapReduceTask(src, mr, Some(MapReduce.Action.Reduce(Some(true))))
                // NB: `finalize` should ensure that the final op is always a
                //     $ReduceF.
                case src =>
                  // TODO: Find a better way to print this
                  @SuppressWarnings(Array("org.wartremover.warts.ToString"))
                  val msg = "not a mapReduce: " + src.unFix.toString
                  scala.sys.error(msg)
              })))
      }

      @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
      def pipeline[T[_[_]]: BirecursiveT](
        op: PipelineF[WorkflowF, T[WorkflowF]]):
          Option[(DocVar, WorkflowTask, List[PipelineOp])] =
        op.wf match {
          case I($MatchF(src, selector)) =>
            @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
            def pipelinable(sel: Selector): Boolean = sel match {
              case Selector.Where(_) => false
              case comp: Selector.CompoundSelector =>
                pipelinable(comp.left) && pipelinable(comp.right)
              case _ => true
            }

            if (pipelinable(selector)) {
              lazy val (base, crushed) = src.para(Crush[WorkflowF].crush[T])
              src.project match {
                case IsPipeline(p) => pipeline(p).cata(
                  { case (base, up, prev) => Some((base, up, prev :+ toPipelineOp(op, base))) },
                  Some((base, crushed, List(toPipelineOp(op, base)))))
                case _ => Some((base, crushed, List(toPipelineOp(op, base))))
              }
            }
            else None
          // TODO: Not all $GroupFs can be pipelined. Need to determine when we may
          //       need the group command or a map/reduce.
          case _ => Some(alwaysPipePipe(op))
        }

      def alwaysPipePipe[T[_[_]]: BirecursiveT](
        op: PipelineF[WorkflowF, T[WorkflowF]]):
          (DocVar, WorkflowTask, List[PipelineOp]) = {
        lazy val (base, crushed) = (finish(_, _)).tupled(op.src.para(crush[T]))
        // TODO: this is duplicated in `WorkflowBuilder.rewrite`
        def repairBase(base: DocVar) = I.prj(op.wf) match {
          case Some($GroupF(_, _, _))   => DocVar.ROOT()
          case Some($ProjectF(_, _, _)) => DocVar.ROOT()
          case _                       => base
        }
          (op.src.project match {
            case IsPipeline(p) => pipeline(p)
            case _             => None
          }).cata(
            {
              case (base, up, prev) =>
                val (nb, task) = finish(base, up)
                (repairBase(nb),
                  task,
                  prev :+ toPipelineOp(op, nb))
            },
            (repairBase(base),
              crushed,
              List(toPipelineOp(op, base))))
      }
    }


  private def wrapArrayLit[EX[_]: Functor]
    (accum: AccumOp[Fix[EX]])
    (implicit ev: ExprOpCoreF :<: EX)
      : AccumOp[Fix[EX]] = {

    def wrap(expr: Fix[EX]): Fix[EX] = (wrapArrayInLet[Fix, EX](expr.unFix)).embed

    accum map wrap
  }

  private def wrapArrayLitExprInLet[EX[_]: Functor]
    (grouped: Grouped[EX])
    (implicit ev: ExprOpCoreF :<: EX, ev2: ExprOpOps.Uni[ExprOp])
      : Grouped[EX] =
    Grouped(ListMap(grouped.value.mapValues(wrapArrayLit[EX]).toSeq: _*))

  // NB: no need for a typeclass if implementing this way, but it will be needed
  // as soon as we need to match on anything here that isn't in core.
  implicit def crystallizeWorkflowF[F[_]: Functor: Classify: Coalesce: Refs](
    implicit I: WorkflowOpCoreF :<: F, ev1: F :<: WorkflowF, ev2: ExprOpOps.Uni[ExprOp]):
      Crystallize[F] =
    new Crystallize[F] {
      // probable conversions
      // to $MapF:          $ProjectF
      // to $FlatMapF:      $MatchF, $LimitF (using scope), $SkipF (using scope), $UnwindF, $GeoNearF
      // to $MapF/$ReduceF:  $GroupF
      // ???:              $RedactF
      // none:             $SortF
      // NB: We don’t convert a $ProjectF after a map/reduce op because it could
      //     affect the final shape unnecessarily.
      def crystallize(op: Fix[F]) = {

        val finished =
          deleteUnusedFields(reorderOps(simplifyGroup[F](op)))

        def fixShape(wf: Fix[F]) =
          simpleShape(wf).fold(finished) { n =>
            $project[F](Reshape(n.strengthR($include().right).toListMap)).apply(finished)
          }

        @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
        def promoteKnownShape(wf: Fix[F]): Fix[F] = wf.project match {
          case I($SimpleMapF(_, _, _)) => fixShape(wf)
          case IsShapePreserving(sp)   => promoteKnownShape(sp.src)
          case _                       => finished
        }

        Crystallized(
          wrapFinalMapReduceValue(
            promoteKnownShape(finished)
              .transHylo(Coalesce[F].coalesce, crystallizeƒ)))
      }

      val crystallizeƒ: F[Fix[F]] => F[Fix[F]] = {
        case I(mr: MapReduceF[Fix[F]]) => mr.singleSource.src.project match {
          case I(uw @ $UnwindF(_, _)) if IsPipeline.unapply(unwindSrc(uw)).isEmpty =>
            mr.singleSource.fmap(ι, I).reparentW(I.inj(uw.flatmapop).embed).project
          case _                        => I.inj(mr)
        }
        case I($FoldLeftF(head, tail)) =>
          I.inj($FoldLeftF[Fix[F]](
            chain(head,
              $project[F](
                Reshape(ListMap(ExprName -> \/-($$ROOT))),
                IncludeId)),
            tail.map(x => x.project match {
              case I($ReduceF(_, _, _)) => x
              case _ => chain(x, $reduce[F]($ReduceF.reduceFoldLeft, ListMap()))
            })))
        case I(g @ $GroupF(src, grouped, by)) =>
          // We can't use arrays directly inside accumulators because of
          // https://jira.mongodb.org/browse/SERVER-23839
          // so let's wrap arrays in a let
          I($GroupF(src, wrapArrayLitExprInLet(grouped), by))

        case op => op
      }

      @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
      def unwindSrc(uw: $UnwindF[Fix[F]]): F[Fix[F]] =
        uw.src.project match {
          case I(uw1 @ $UnwindF(_, _)) => unwindSrc(uw1)
          case src => src
        }

      /** Wraps the result of a map-reduce in the Sigil, if it is the last
        * stage in the workflow, so it may be identified and unwrapped
        * when reading and querying.
        *
        * TODO: This doesn't appear to be necessary when returning "inline"
        *       map-reduce results, but we don't have enough information to
        *       know if this is the case here. It may be worth refactoring
        *       to be a transformation on WorkflowTask.
        */
      val wrapFinalMapReduceValue: Fix[F] => Fix[F] = {
        case mr @ Embed(I(_: MapReduceF[Fix[F]])) =>
          $simpleMap[F](
            NonEmptyList(MapExpr(jscore.JsFn(
              finalValue,
              jscore.obj(sigil.Quasar -> jscore.Ident(finalValue))))),
            ListMap()).apply(mr)

        case other => other
      }

      val finalValue = jscore.Name("__finalVal")
    }

  implicit def workflowRenderTree[T[_[_]]: RecursiveT, F[_]: Traverse: Classify](implicit ev0: WorkflowOpCoreF :<: F, ev1: RenderTree[F[Unit]]): RenderTree[T[F]] =
    new RenderTree[T[F]] {
      val wfType = "Workflow" :: Nil

      @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
      def chain(op: T[F]): List[RenderedTree] = op.project match {
        case IsSingleSource(ss) =>
          chain(ss.src) :+ Traverse[F].void(ss.wf).render
        case _ => List(render(op))
      }

      @SuppressWarnings(Array("org.wartremover.warts.Recursion"))
      def render(v: T[F]) = v.project match {
        case IsSource(s)       => s.op.render
        case IsSingleSource(_) =>
          NonTerminal("Chain" :: wfType, None, chain(v))
        case ev0($FoldLeftF(_, _)) =>
          NonTerminal("$FoldLeftF" :: wfType, None, v.children.map(render(_)))
      }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy