All Downloads are FREE. Search and download functionalities are using the official Maven repository.

quasar.physical.mongodb.MongoDb.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014–2017 SlamData Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package quasar
package physical.mongodb

import slamdata.Predef._
import quasar.common._
import quasar.connector._
import quasar.contrib.scalaz.MonadReader_
import quasar.contrib.pathy._
import quasar.effect.{Kvs, MonoSeq}
import quasar.fp._
import quasar.fp.numeric._
import quasar.fp.ski.κ
import quasar.fs._
import quasar.fs.mount._
import quasar.physical.mongodb.fs.bsoncursor._
import quasar.physical.mongodb.mongoiterable._
import quasar.physical.mongodb.workflow._
import quasar.qscript._
import quasar.qscript.analysis._
import quasar.qscript.rewrites.{Optimize, Unicoalesce, Unirewrite}

import java.time.Instant
import scala.Predef.implicitly

import matryoshka._
import matryoshka.data._
import org.bson.{BsonDocument, BsonValue}
import scalaz._, Scalaz._
import scalaz.concurrent.Task

object MongoDb
    extends BackendModule
    with ManagedReadFile[BsonCursor]
    with ManagedWriteFile[Collection]
    with DefaultAnalyzeModule {

  type QS[T[_[_]]] = fs.MongoQScriptCP[T]

  implicit def qScriptToQScriptTotal[T[_[_]]]: Injectable.Aux[QSM[T, ?], QScriptTotal[T, ?]] =
    physical.mongodb.qScriptToQScriptTotal[T]

  type Repr = Crystallized[WorkflowF]

  type M[A] = fs.MongoM[A]

  import Cost._
  import Cardinality._

  def CardinalityQSM: Cardinality[QSM[Fix, ?]] = Cardinality[QSM[Fix, ?]]
  def CostQSM: Cost[QSM[Fix, ?]] = Cost[QSM[Fix, ?]]
  def TraverseQSM[T[_[_]]] = Traverse[QSM[T, ?]]
  def FunctorQSM[T[_[_]]] = Functor[QSM[T, ?]]
  def DelayRenderTreeQSM[T[_[_]]: BirecursiveT: EqualT: ShowT: RenderTreeT] = implicitly[Delay[RenderTree, QSM[T, ?]]]
  def ExtractPathQSM[T[_[_]]: RecursiveT] = ExtractPath[QSM[T, ?], APath]
  def QSCoreInject[T[_[_]]] = implicitly[QScriptCore[T, ?] :<: QSM[T, ?]]
  def MonadM = Monad[M]
  def UnirewriteT[T[_[_]]: BirecursiveT: EqualT: ShowT: RenderTreeT] = implicitly[Unirewrite[T, QS[T]]]
  def UnicoalesceCap[T[_[_]]: BirecursiveT: EqualT: ShowT: RenderTreeT] = Unicoalesce.Capture[T, QS[T]]

  type Config = fs.MongoConfig

  // Managed
  def MonoSeqM = MonoSeq[M]
  def ReadKvsM = Kvs[M, ReadFile.ReadHandle, BsonCursor]
  def WriteKvsM = Kvs[M, WriteFile.WriteHandle, Collection]

  // we don't apply `O.subsetBeforeMap` because we need to apply that
  // after we apply `mapBeforeSort`
  def optimize[T[_[_]]: BirecursiveT: EqualT: ShowT]
      : QSM[T, T[QSM[T, ?]]] => QSM[T, T[QSM[T, ?]]] = {
    val O = new Optimize[T]
    liftFF[QScriptCore[T, ?], QSM[T, ?], T[QSM[T, ?]]](
      repeatedly(O.filterBeforeUnion[QSM[T, ?]]))
  }

  def parseConfig(uri: ConnectionUri): BackendDef.DefErrT[Task, Config] =
    fs.parseConfig(uri)

  def compile(cfg: Config): BackendDef.DefErrT[Task, (M ~> Task, Task[Unit])] =
    fs.compile(cfg)

  val Type = FileSystemType("mongodb")

  private def checkPathsExist[T[_[_]]: BirecursiveT](qs: T[MongoDb.QSM[T, ?]]): Backend[Unit] = {
    import fs.QueryContext._, fs.queryfileTypes.QRT
    val rez = for {
      colls <- EitherT.fromDisjunction[MongoDbIO](
                 fs.QueryContext.collections(qs).leftMap(FileSystemError.pathErr(_)))
      _     <- colls.traverse_(c => EitherT(MongoDbIO.collectionExists(c)
                .map(_ either (()) or FileSystemError.pathErr(PathError.pathNotFound(c.asFile)))))
    } yield ()
    val e: MongoLogWFR[BsonCursor, Unit] = EitherT[MongoLogWF[BsonCursor, ?], FileSystemError, Unit](
      rez.run.liftM[QRT].liftM[PhaseResultT])

    toBackendP(e.run.run)
  }

  def doPlan[
      T[_[_]]: BirecursiveT: EqualT: ShowT: RenderTreeT,
      N[_]: Monad: MonadFsErr: PhaseResultTell]
      (qs: T[QSM[T, ?]],
        ctx: fs.QueryContext,
        queryModel: MongoQueryModel,
        anyDoc: Collection => OptionT[N, BsonDocument],
        execTime: Instant)
      : N[Repr] =
    MongoDbPlanner.planExecTime[T, N](qs, ctx, queryModel, anyDoc, execTime)

  // TODO[scalaz]: Shadow the scalaz.Monad.monadMTMAB SI-2712 workaround
  import EitherT.eitherTMonad

  @SuppressWarnings(Array("org.wartremover.warts.Null"))
  def plan[T[_[_]]: BirecursiveT: EqualT: ShowT: RenderTreeT](
      qs: T[QSM[T, ?]]): Backend[Repr] =
    for {
      v <- config[Backend].map(_.serverVersion)
      ctx <- toBackendP(fs.QueryContext.queryContext[T, Backend](qs))
      _ <- checkPathsExist(qs)
      execTime <- QueryFileModule.queryTime.liftM[PhaseResultT].liftM[FileSystemErrT]
      anyDoc = (c: Collection) => MongoDbIO.first(c).mapT(x => toM(x).liftB)
      p <- doPlan[T, Backend](qs, ctx, MongoQueryModel(v), anyDoc, execTime)
    } yield p

  private type PhaseRes[A] = PhaseResultT[ConfiguredT[M, ?], A]

  private val effToConfigured: fs.Eff ~> Configured =
    λ[fs.Eff ~> Configured](eff => Free.liftF(eff).liftM[ConfiguredT])

  private val effToPhaseRes: fs.Eff ~> PhaseRes =
    λ[Configured ~> PhaseRes](_.liftM[PhaseResultT]) compose effToConfigured

  private def toEff[C[_], A](c: C[A])(implicit inj: C :<: fs.Eff): fs.Eff[A] = inj(c)

  def toM[C[_], A](c: C[A])(implicit inj: C :<: fs.Eff): M[A] = Free.liftF(toEff(c))

  def toBackend[C[_], A](c: C[FileSystemError \/ A])(implicit inj: C :<: fs.Eff): Backend[A] =
    EitherT(c).mapT(x => effToPhaseRes(toEff(x)))

  def toBackendP[C[_], A](c: C[(PhaseResults, FileSystemError \/ A)])(implicit inj: C :<: fs.Eff): Backend[A] =
    EitherT(WriterT(effToConfigured(toEff(c))))

  def toConfigured[C[_], A](c: C[A])(implicit inj: C :<: fs.Eff): Configured[A] =
    effToConfigured(toEff(c))

  val DC = DataCursor[MongoDbIO, BsonCursor]

  object QueryFileModule extends QueryFileModule {
    import QueryFile._

    private def mkInterp[F[_] : Functor](implicit C: MonadReader_[F, Config]): F[fs.QueryFileInterpreter] =
      config[F].map(cfg => new fs.QueryFileInterpreter(cfg.wfExec))

    def executePlan(repr: Repr, out: AFile): Backend[Unit] =
      mkInterp[Backend] >>= (i => toBackendP(i.execPlan(repr, out)))

    def evaluatePlan(repr: Repr): Backend[ResultHandle] =
      for {
        dbName <- config[Backend].map(_.defaultDb.map(_.run))
        i <- mkInterp[Backend]
        handle <- toBackendP(i.evalPlan(repr, dbName))
      } yield handle

    def explain(repr: Repr): Backend[String] =
      for {
        dbName <- config[Backend].map(_.defaultDb.map(_.run))
        i <- mkInterp[Backend]
        s <- toBackendP(i.explain(repr, dbName))
      } yield s

    def more(h: ResultHandle): Backend[Vector[Data]] =
      mkInterp[Backend] >>= (i => toBackend(i.more(h)))

    def close(h: ResultHandle): Configured[Unit] =
      mkInterp[Configured] >>= (i => toConfigured(i.close(h)))

    def listContents(dir: ADir): Backend[Set[PathSegment]] =
      mkInterp[Backend] >>= (i => toBackend(i.listContents0(dir)))

    def fileExists(file: AFile): Configured[Boolean] =
      mkInterp[Configured] >>= (i => toConfigured(i.fileExists(file)))

    def queryTime: Configured[Instant] =
      mkInterp[Configured] >>= (i => toConfigured(i.queryTime))

  }

  object ManagedReadFileModule extends ManagedReadFileModule {

    private def cursor(coll: Collection, offset: Natural, limit: Option[Positive]): MongoDbIO[BsonCursor] =
      for {
        iter <- MongoDbIO.find(coll)
        iter2 =  iter.skip(offset.value.toInt)
        iter3 = limit.map(l => iter2.limit(l.value.toInt)).getOrElse(iter2)
        cur  <- MongoDbIO.async(iter3.widen[BsonValue].batchCursor)
      } yield cur

    def readCursor(f: AFile, offset: Natural, limit: Option[Positive])
        : Backend[BsonCursor] =
      Collection.fromFile(f).fold(
        err  => MonadFsErr[Backend].raiseError[BsonCursor](FileSystemError.pathErr(err)),
        coll => toM(cursor(coll, offset, limit)).liftB)

    def nextChunk(c: BsonCursor): Backend[(BsonCursor, Vector[Data])] =
      toM(DC.nextChunk(c).map((c, _))).liftB

    def closeCursor(c: BsonCursor): Configured[Unit] =
      toConfigured(DC.close(c))
  }

  object ManagedWriteFileModule extends ManagedWriteFileModule {
    private def dataToDocument(v: BsonVersion, d: Data): FileSystemError \/ Bson.Doc =
      BsonCodec.fromData(v, d)
        .leftMap(err => FileSystemError.writeFailed(d, err.shows))
        .flatMap {
          case doc @ Bson.Doc(_) => doc.right
          case otherwise         => FileSystemError.writeFailed(d, "MongoDB is only able to store documents").left
        }

    def writeCursor(file: AFile): Backend[Collection] =
      Collection.fromFile(file).fold(
        err => MonadFsErr[Backend].raiseError[Collection](FileSystemError.pathErr(err)),
        coll => toM(MongoDbIO.ensureCollection(coll) *> coll.point[MongoDbIO]).liftB)

    def writeChunk(c: Collection, chunk: Vector[Data])
        : Configured[Vector[FileSystemError]] =
      for {
        v <- config[Configured].map(cfg => MongoQueryModel.toBsonVersion(MongoQueryModel(cfg.serverVersion)))
        r <- doWriteChunk(v, c, chunk)
      } yield r

    private def doWriteChunk(v: BsonVersion, c: Collection, chunk: Vector[Data])
        : Configured[Vector[FileSystemError]] = {
      val (errs, docs) = chunk foldMap { d =>
        dataToDocument(v, d).fold(
          e => (Vector(e), Vector()),
          d => (Vector(), Vector(d)))
      }
      val io = MongoDbIO.insertAny(c, docs.map(_.repr))
        .filter(_ < docs.size)
        .map(n => FileSystemError.partialWrite(docs.size - n))
        .run.map(errs ++ _.toList)
      toConfigured(io)
    }

    def closeCursor(c: Collection): Configured[Unit] =
      ().point[Configured]
  }

  object ManageFileModule extends ManageFileModule {
    import fs.managefile._, ManageFile._

    /** TODO: There are still some questions regarding Path
      *   1) We should assume all paths will be canonicalized and can do so
      *      with a ManageFile ~> ManageFile that canonicalizes everything.
      *
      *   2) Currently, parsing a directory like "/../foo/bar/" as an absolute
      *      dir succeeds, this should probably be changed to fail.
      */
    def move(scenario: PathPair, semantics: MoveSemantics): Backend[Unit] = {
      val mm: MongoManage[FileSystemError \/ Unit] =
        scenario.fold(moveDir(_, _, semantics), moveFile(_, _, semantics))
          .run.liftM[ManageInT]
      toBackend(mm)
    }

    def copy(pair: PathPair): Backend[Unit] =
      toBackend(FileSystemError.unsupportedOperation("MongoDb connector does not currently support copying").left[Unit].point[MongoDbIO])

    def delete(path: APath): Backend[Unit] = {
      val mm: MongoManage[FileSystemError \/ Unit] =
        pathy.Path.refineType(path).fold(deleteDir, deleteFile)
          .run.liftM[ManageInT]
      toBackend(mm)
    }

    def tempFile(near: APath): Backend[AFile] = {
      val checkPath =
        EitherT.fromDisjunction[MongoManage](Collection.dbNameFromPath(near))
          .bimap(FileSystemError.pathErr(_), κ(()))

      val mkTemp =
        freshName.liftM[FileSystemErrT] map { n =>
          pathy.Path.refineType(near).fold(
            _  pathy.Path.file(n),
            f => pathy.Path.fileParent(f)  pathy.Path.file(n))
        }

      val mm: MongoManage[FileSystemError \/ AFile] = checkPath.flatMap(κ(mkTemp)).run
      toBackend(mm)
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy