All Downloads are FREE. Search and download functionalities are using the official Maven repository.

quasar.physical.mongodb.fs.QueryFileInterpreter.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014–2017 SlamData Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package quasar.physical.mongodb.fs

import slamdata.Predef._
import quasar._, RenderTree.ops._
import quasar.common.{PhaseResult, PhaseResults, PhaseResultT}
import quasar.contrib.pathy._
import quasar.contrib.scalaz.eitherT._
import quasar.contrib.scalaz.kleisli._
import quasar.fp._
import quasar.fp.ski._
import quasar.frontend.logicalplan.{constant, LogicalPlan}
import quasar.fs._
import quasar.javascript._
import quasar.physical.mongodb._, WorkflowExecutor.WorkflowCursor

import argonaut.JsonObject, JsonObject.{single => jSingle}
import argonaut.JsonIdentity._
import java.time.Instant
import matryoshka._
import matryoshka.data._
import matryoshka.implicits._
import scalaz._, Scalaz._
import scalaz.concurrent.Task

final class QueryFileInterpreter(execMongo: WorkflowExecutor[MongoDbIO, BsonCursor]) {

  import QueryFile._
  import quasar.physical.mongodb.workflow._
  import FileSystemError._
  import queryfileTypes._
  import QueryContext._

  private val execJs = WorkflowExecutor.javaScript

  def execPlan(wf: Crystallized[WorkflowF], out: AFile): MQPhErr[Unit] =
    (for {
      dst <- EitherT(Collection.fromFile(out)
                .leftMap(pathErr(_))
                .point[MongoLogWF[C, ?]])
      _   <- handlePlan(wf, execJs.execute(_, dst), execWorkflow(_, dst, _))
    } yield ()).run.run


  def evalPlan(wf: Crystallized[WorkflowF], dbName: Option[DatabaseName]): MQPhErr[ResultHandle] =
    (for {
      rcursor <- handlePlan(wf, execJs.evaluate(_, dbName), evalWorkflow(_, dbName, _))
      handle <- liftMQ(recordCursor(rcursor))
    } yield handle).run.run

  def explain(wf: Crystallized[WorkflowF], dbName: Option[DatabaseName]): MQPhErr[String] = {
    val (stmts, r) = execJs.evaluate(wf, dbName)
                       .leftMap(wfErrToFsErr(wf))
                       .run.run(CollectionName("tmp.gen_"))
                       .eval(0).run
    val out = Js.Stmts(stmts.toList).pprint(0)

    (for {
      exp <- EitherT.fromDisjunction[MongoLogWF[C, ?]](r.as(out))
      _   <- logProgram(stmts).liftM[FileSystemErrT]
    } yield exp).run.run
  }

  def more(h: ResultHandle): MQErr[Vector[Data]] =
    moreResults(h)
      .toRight(unknownResultHandle(h))
      .run

  def close(h: ResultHandle): MQ[Unit] =
    OptionT[MQ, ResultCursor[C]](MongoQuery(resultsL(h) <:= none))
      .flatMapF(_.fold(κ(().point[MQ]), wc =>
        DataCursor[MongoDbIO, WorkflowCursor[C]]
          .close(wc)
          .liftM[QRT]))
      .run.void

  def listContents0(dir: ADir): MQErr[Set[PathSegment]] =
    listContents(dir).run.liftM[QRT]

  def fileExists(file: AFile): MQ[Boolean] =
    Collection.fromFile(file).fold(
      κ(false.point[MQ]),
      coll => MongoDbIO.collectionExists(coll).liftM[QRT])

  def queryTime: MQ[Instant] =
    MongoDbIO.liftTask(Task.delay { Instant.now }).liftM[QRT]

  ////

  private type PlanR[A]       = EitherT[WriterT[MongoDbIO, PhaseResults, ?], FileSystemError, A]

  type C = BsonCursor
  implicit val DC: DataCursor[MongoDbIO, C] = bsoncursor.bsonCursorDataCursor

  private type JsR[A] =
    WorkflowExecErrT[ReaderT[StateT[JavaScriptLog, Long, ?], CollectionName, ?], A]

  private val queryR =
    MonadReader[MQ, (Option[DefaultDb], TaskRef[EvalState[C]])]

  // FIXME: Not sure how to distinguish these.
  @SuppressWarnings(Array("org.wartremover.warts.Overloading"))
  private def MongoQuery[A](f: TaskRef[EvalState[C]] => Task[A]): MQ[A] =
    queryR.ask flatMapK { case (_, ref) => MongoDbIO.liftTask(f(ref)) }

  private def MongoQuery[A](s: State[EvalState[C], A]): MQ[A] =
    MongoQuery(_ modifyS s.run)

  private val seqL: EvalState[C] @> Long =
    Lens.firstLens

  private val resultMapL: EvalState[C] @> ResultMap[C] =
    Lens.secondLens

  private def resultsL(h: ResultHandle): EvalState[C] @> Option[ResultCursor[C]] =
    Lens.mapVLens(h) <=< resultMapL

  private def freshHandle: MQ[ResultHandle] =
    MongoQuery(seqL <%= (_ + 1)) map (ResultHandle(_))

  private def recordCursor(c: ResultCursor[C]): MQ[ResultHandle] =
    freshHandle flatMap (h => MongoQuery(resultsL(h) := some(c)) as h)

  private def lookupCursor(h: ResultHandle): OptionT[MQ, ResultCursor[C]] =
    OptionT(MongoQuery(resultsL(h).st))

  private def defaultDbName: MQ[Option[DatabaseName]] =
    queryR.asks(_._1.map(_.run))

  private def genPrefix: MQ[CollectionName] =
    MongoDbIO.liftTask(NameGenerator.salt)
      .map(salt => CollectionName(s"tmp.gen_${salt}_"))
      .liftM[QRT]

  private val liftMQ: MQ ~> MongoLogWFR[C, ?] =
    liftMT[MongoLogWF[C, ?], FileSystemErrT] compose liftMT[MQ, PhaseResultT]

  def handlePlan[A](
    wf: Crystallized[WorkflowF],
    log: Crystallized[WorkflowF] => JsR[_],
    handle: (Crystallized[WorkflowF], CollectionName) => WorkflowExecErrT[MQ, A]
  ): MongoLogWFR[C, A] =
    for {
      prefix <- liftMQ(genPrefix)
      _ <- writeJsLog(wf, log(wf), prefix)
      a <- toMongoLogWFR(wf, handle(wf, prefix))
    } yield a

  private def toMongoLogWFR[A](wf: Crystallized[WorkflowF], wfErrMq: WorkflowExecErrT[MQ, A]): MongoLogWFR[C, A] =
    EitherT[MongoLogWF[C, ?], FileSystemError, A](
      wfErrMq.leftMap(wfErrToFsErr(wf))
        .run.mapK(_.attemptMongo.leftMap(err =>
          execFailed(
            wf,
            s"MongoDB Error: ${err.cause.getMessage}",
            JsonObject.empty,
            some(err)).left[A]
          ).merge)
        .liftM[PhaseResultT])

  def execWorkflow(
    wf: Crystallized[WorkflowF],
    dst: Collection,
    tmpPrefix: CollectionName
  ): WorkflowExecErrT[MQ, Unit] =
    EitherT[MQ, WorkflowExecutionError, Unit](
      execMongo.execute(wf, dst).run.run(tmpPrefix).eval(0).liftM[QRT])

  def evalWorkflow(
    wf: Crystallized[WorkflowF],
    defDb: Option[DatabaseName],
    tmpPrefix: CollectionName
  ): WorkflowExecErrT[MQ, ResultCursor[C]] =
    EitherT[MQ, WorkflowExecutionError, ResultCursor[C]](
      execMongo.evaluate(wf, defDb).run.run(tmpPrefix).eval(0).liftM[QRT])

  private def writeJsLog(wf: Crystallized[WorkflowF], jsr: JsR[_], tmpPrefix: CollectionName): MongoLogWFR[C, Unit] = {
    val (stmts, r) = jsr.run.run(tmpPrefix).eval(0).run
    EitherT(logProgram(stmts) as r.leftMap(wfErrToFsErr(wf))).void
  }

  private def logProgram(prog: JavaScriptPrg): MongoLogWF[C, Unit] =
    MonadTell[MongoLogWF[C, ?], PhaseResults].tell(Vector(
      PhaseResult.detail("MongoDB", Js.Stmts(prog.toList).pprint(0))))

  private def moreResults(h: ResultHandle): OptionT[MQ, Vector[Data]] = {
    val toData: Bson => Data =
      (BsonCodec.toData _) <<< sigil.Sigil[Bson].elideQuasarSigil

    def pureNextChunk(bsons: List[Bson]) =
      if (bsons.isEmpty)
        Vector.empty[Data].point[MQ]
      else
        MongoQuery(resultsL(h) := some(List().left))
          .as(bsons.map(toData).toVector)

    lookupCursor(h) flatMapF (_.fold(pureNextChunk, wc =>
      DataCursor[MongoDbIO, WorkflowCursor[C]]
        .nextChunk(wc)
        .liftM[QRT]))
  }

  import WorkflowExecutionError.{InvalidTask, InsertFailed, NoDatabase}

  private def execFailed(wf: Crystallized[WorkflowF], s: String, detail: JsonObject, cause: Option[PhysicalError])
      : FileSystemError =
    //FIXME executionFailed expects a LogicalPlan but this is not available anymore at this stage
    //so just supplying an empty LogicalPlan for now.
    //We can fix this properly after executionFailed has been refactored
    executionFailed(constant[Fix[LogicalPlan]](quasar.Data.NA).embed, s, detail, cause)

  private def execFailed_(wf: Crystallized[WorkflowF], s: String): FileSystemError =
    //FIXME executionFailed expects a LogicalPlan but this is not available anymore at this stage
    //so just supplying an empty LogicalPlan for now.
    //We can fix this properly after executionFailed has been refactored
    executionFailed_(constant[Fix[LogicalPlan]](quasar.Data.NA).embed, s)

  private def wfErrToFsErr(wf: Crystallized[WorkflowF]): WorkflowExecutionError => FileSystemError = {
    case InvalidTask(task, reason) =>
      execFailed(wf,
        s"Invalid MongoDB workflow task: $reason",
        jSingle("workflowTask", task.render.asJson),
        none)

    case InsertFailed(bson, reason) =>
      execFailed(wf,
        s"Unable to insert data into MongoDB: $reason",
        jSingle("data", bson.shows.asJson),
        none)

    case NoDatabase =>
      execFailed_(wf,
        "Executing this plan on MongoDB requires temporary collections, but a database in which to store them could not be determined.")
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy