All Downloads are FREE. Search and download functionalities are using the official Maven repository.

quasar.physical.mongodb.mapreduce.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014–2017 SlamData Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package quasar.physical.mongodb

import slamdata.Predef._
import quasar.common.SortDir
import quasar.javascript._

import com.mongodb.client.model.MapReduceAction
import monocle.macros.GenLens
import scalaz._, Scalaz._

/** Configuration parameters for MapReduce operations
  *
  * @param map The mapping function
  * @param reduce The reducing function
  * @param selection Query selector to apply to input documents
  * @param inputSort Criteria to use to sort the input documents.
  * @param limit Limit the number of input documents to `map`
  * @param finalizer JavaScript function applied to the output after the
  *                  `reduce` function.
  * @param scope Global variables made available to the `map`, `reduce` and
  *              `finalizer` functions.
  * @param jsMode Whether to avoid converting intermediate values to
  *               BSON, leaving them as JavaScript objects instead.
  *               Setting this to `true` has implications on the size of
  *               the input, see the MongoDB `mapReduce` documentation
  *               for details.
  * @param verbose Whether to include additional information, such
  *                as timing, in the results.
  */
@SuppressWarnings(Array("org.wartremover.warts.DefaultArguments"))
final case class MapReduce(
  map:       Js.Expr, // "function if (...) emit(...) }"
  reduce:    Js.Expr, // "function (key, values) { ...; return ... }"
  selection: Option[Selector] = None,
  inputSort: Option[NonEmptyList[(BsonField, SortDir)]] = None,
  limit:     Option[Long] = None,
  finalizer: Option[Js.Expr] = None, // "function (key, reducedValue) { ...; return ... }"
  scope:     MapReduce.Scope = ListMap(),
  jsMode:    Option[Boolean] = None,
  verbose:   Option[Boolean] = None) {

  import MapReduce._

  ////

  def inlineBson: Bson.Doc =
    toBson(Bson.Doc(ListMap("inline" -> Bson.Int64(1))))

  def toCollBson(dst: OutputCollection): Bson.Doc =
    toBson(dst.bson)

  private def toBson(out: Bson): Bson.Doc = {
    def sortBson(xs: NonEmptyList[(BsonField, SortDir)]): Bson.Doc =
      Bson.Doc(ListMap(xs.list.toList.map(_ bimap (_.asText, sortDirToBson(_))): _*))

    Bson.Doc(ListMap(("out" -> out) :: List(
      selection        map  ("query"    -> _.bson),
      inputSort        map  ("sort"     -> sortBson(_)),
      limit            map  ("limit"    -> Bson.Int64(_)),
      finalizer        map  ("finalize" -> Bson.JavaScript(_)),
      scope.nonEmpty option ("scope"    -> Bson.Doc(scope)),
      jsMode           map  ("jsMode"   -> Bson.Bool(_)),
      verbose          map  ("verbose"  -> Bson.Bool(_))
    ).unite: _*))
  }
}

object MapReduce {
  type Scope = ListMap[String, Bson]

  sealed abstract class Action {
    def nonAtomic: Option[Boolean]
  }

  object Action {
    /** Replace any existing documents in the destination collection with the
      * result of the map reduce.
      */
    final case object Replace extends Action {
      def nonAtomic = None
    }

    /** Merge the result of the map reduce with the existing contents of the
      * output collection.
      */
    final case class Merge(nonAtomic: Option[Boolean]) extends Action

    /** The name would suggest the output of the map reduce is reduced, using
      * the reducer function, with any existing contents of the output
      * collection, but the MongoDB docs are incomplete here.
      */
    final case class Reduce(nonAtomic: Option[Boolean]) extends Action

    /** Returns the field name that represents the given `Action` in a
      * map-reduce BSON document.
      */
    def bsonFieldName(act: Action): String =
      (act match {
        case Replace   => MapReduceAction.REPLACE
        case Merge(_)  => MapReduceAction.MERGE
        case Reduce(_) => MapReduceAction.REDUCE
      }).name.toLowerCase
  }

  /** Action to apply to output collection.
    *
    * @param action The action to take if the output collection exists.
    * @param databaseName the database containing the output collection,
    *                     defaulting to the source database
    * @param shardOutputCollection whether the output collection should be
    *                              sharded (the output database must support
    *                              sharding).
    */
  final case class ActionedOutput(
    action: Action,
    database: Option[DatabaseName],
    shardOutputCollection: Option[Boolean]
  ) {
    def bson(coll: CollectionName): Bson.Doc =
      Bson.Doc(ListMap((Action.bsonFieldName(action) -> coll.bson) :: List(
        database              map ("db"        -> _.bson),
        shardOutputCollection map ("sharded"   -> Bson.Bool(_)),
        action.nonAtomic      map ("nonAtomic" -> Bson.Bool(_))
      ).unite: _*))
  }

  /** Output collection for non-inline map-reduce jobs. */
  final case class OutputCollection(
    collection: CollectionName,
    withAction: Option[ActionedOutput]
  ) {
    def bson: Bson =
      withAction.fold[Bson](collection.bson)(_ bson collection)
  }

  val _map       = GenLens[MapReduce](_.map)
  val _reduce    = GenLens[MapReduce](_.reduce)
  val _finalizer = GenLens[MapReduce](_.finalizer)
  val _scope     = GenLens[MapReduce](_.scope)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy