All Downloads are FREE. Search and download functionalities are using the official Maven repository.

quasar.optimizer.scala Maven / Gradle / Ivy

There is a newer version: 28.1.6
Show newest version
/*
 * Copyright 2014–2016 SlamData Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package quasar

import quasar.Predef._
import quasar.fp.binder._
import quasar.namegen._

import matryoshka._, Recursive.ops._, FunctorT.ops._, TraverseT.ownOps._
import scalaz._, Scalaz._
import shapeless.{Data => _, :: => _, _}

object Optimizer {
  import LogicalPlan._
  import quasar.std.StdLib._
  import set._
  import structural._
  import Planner._

  private def countUsageƒ(target: Symbol): Algebra[LogicalPlan, Int] = {
    case FreeF(symbol) if symbol == target => 1
    case LetF(ident, form, _) if ident == target => form
    case x => x.fold
  }

  private def inlineƒ[T[_[_]], A](target: Symbol, repl: LogicalPlan[T[LogicalPlan]]):
      LogicalPlan[(T[LogicalPlan], T[LogicalPlan])] => LogicalPlan[T[LogicalPlan]] =
  {
    case FreeF(symbol) if symbol == target => repl
    case LetF(ident, form, body) if ident == target =>
      LetF(ident, form._2, body._1)
    case x => x.map(_._2)
  }

  def simplifyƒ[T[_[_]]: Recursive: Corecursive]:
      LogicalPlan[T[LogicalPlan]] => Option[LogicalPlan[T[LogicalPlan]]] = {
    case inv @ InvokeF(func, _) => func.simplify(inv)
    case LetF(ident, form, in) => form.project match {
      case ConstantF(_)
         | FreeF(_) => in.transPara(inlineƒ(ident, form.project)).project.some
      case _ => in.cata(countUsageƒ(ident)) match {
        case 0 => in.project.some
        case 1 => in.transPara(inlineƒ(ident, form.project)).project.some
        case _ => None
      }
    }
    case _ => None
  }

  def simplify(t: Fix[LogicalPlan]): Fix[LogicalPlan] = t.transCata(repeatedly(simplifyƒ))

  val namesƒ: Algebra[LogicalPlan, Set[Symbol]] = {
    case FreeF(name) => Set(name)
    case x           => x.fold
  }

  def uniqueName[F[_]: Functor: Foldable](
    prefix: String, plans: F[Fix[LogicalPlan]]):
      Symbol = {
    val existingNames = plans.map(_.cata(namesƒ)).fold
    def loop(pre: String): Symbol =
      if (existingNames.contains(Symbol(prefix)))
        loop(pre + "_")
      else Symbol(prefix)

    loop(prefix)
  }

  val shapeƒ: GAlgebra[(Fix[LogicalPlan], ?), LogicalPlan, Option[List[Fix[LogicalPlan]]]] = {
    case LetF(_, _, body) => body._2
    case ConstantF(Data.Obj(map)) =>
      Some(map.keys.map(n => Constant(Data.Str(n))).toList)
    case InvokeFUnapply(DeleteField, Sized(src, field)) =>
      src._2.map(_.filterNot(_ == field._1))
    case InvokeFUnapply(MakeObject, Sized(field, _)) => Some(List(field._1))
    case InvokeFUnapply(ObjectConcat, srcs) => srcs.traverse(_._2).map(_.flatten)
    // NB: the remaining InvokeF cases simply pass through or combine shapes
    //     from their inputs. It would be great if this information could be
    //     handled generically by the type system.
    case InvokeFUnapply(OrderBy, Sized(src, _, _)) => src._2
    case InvokeFUnapply(Take, Sized(src, _)) => src._2
    case InvokeFUnapply(Drop, Sized(src, _)) => src._2
    case InvokeFUnapply(Filter, Sized(src, _)) => src._2
    case InvokeFUnapply(InnerJoin | LeftOuterJoin | RightOuterJoin | FullOuterJoin, _) =>
      Some(List(Constant(Data.Str("left")), Constant(Data.Str("right"))))
    case InvokeFUnapply(GroupBy, Sized(src, _)) => src._2
    case InvokeFUnapply(Distinct, Sized(src, _)) => src._2
    case InvokeFUnapply(DistinctBy, Sized(src, _)) => src._2
    case InvokeFUnapply(identity.Squash, Sized(src)) => src._2
    case _ => None
  }

  def preserveFree0[A](x: (Fix[LogicalPlan], A))(f: A => Fix[LogicalPlan]):
      Fix[LogicalPlan] = x._1.unFix match {
    case FreeF(_) => x._1
    case _        => f(x._2)
  }

  // TODO: implement `preferDeletions` for other backends that may have more
  //       efficient deletes. Even better, a single function that takes a
  //       function parameter deciding which way each case should be converted.
  private val preferProjectionsƒ:
      GAlgebra[
        (Fix[LogicalPlan], ?),
        LogicalPlan,
        (Fix[LogicalPlan], Option[List[Fix[LogicalPlan]]])] = { node =>

    def preserveFree(x: (Fix[LogicalPlan], (Fix[LogicalPlan], Option[List[Fix[LogicalPlan]]]))) =
      preserveFree0(x)(_._1)

    (node match {
      case InvokeFUnapply(DeleteField, Sized(src, field)) =>
        src._2._2.fold(
          Invoke(DeleteField, Func.Input2(preserveFree(src), preserveFree(field)))) {
          fields =>
            val name = uniqueName("src", fields)
              Let(name, preserveFree(src),
                Fix(MakeObjectN(fields.filterNot(_ == field._2._1).map(f =>
                  f -> Invoke(ObjectProject, Func.Input2(Free(name), f))): _*)))
        }
      case lp => Fix(lp.map(preserveFree))
    },
      shapeƒ(node.map(_._2)))
  }

  def preferProjections(t: Fix[LogicalPlan]): Fix[LogicalPlan] =
    boundPara(t)(preferProjectionsƒ)._1.transCata(repeatedly(simplifyƒ))

  val elideTypeCheckƒ: Algebra[LogicalPlan, Fix[LogicalPlan]] = {
    case LetF(n, b, Fix(TypecheckF(Fix(FreeF(nf)), _, cont, _)))
        if n == nf =>
      Let(n, b, cont)
    case x => Fix(x)
  }

  /** To be used by backends that require collections to contain Obj, this
    * looks at type checks on `Read` then either eliminates them if they are
    * trivial, leaves them if they check field contents, or errors if they are
    * incompatible.
    */
  def assumeReadObjƒ:
      AlgebraM[PlannerError \/ ?, LogicalPlan, Fix[LogicalPlan]] = {
    case x @ LetF(n, r @ Fix(ReadF(_)),
      Fix(TypecheckF(Fix(FreeF(nf)), typ, cont, _)))
        if n == nf =>
      typ match {
        case Type.Obj(m, Some(Type.Top)) if m == ListMap() =>
          \/-(Let(n, r, cont))
        case Type.Obj(_, _) =>
          \/-(Fix(x))
        case _ =>
          -\/(UnsupportedPlan(x,
            Some("collections can only contain objects, but a(n) " +
              typ +
              " is expected")))
      }
    case x => \/-(Fix(x))
  }

  sealed trait Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]): A
  }
  // A condition that refers to left and right sources using equality, so may
  // be rewritten into the join condition:
  final case class EquiCond[A](run0: (Fix[LogicalPlan], Fix[LogicalPlan]) => A) extends Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]) = run0(l,r)
  }
  // A condition which refers only to the left source:
  final case class LeftCond[A](run0: Fix[LogicalPlan] => A) extends Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]) = run0(l)
  }
  // A condition which refers only to the right source:
  final case class RightCond[A](run0: Fix[LogicalPlan] => A) extends Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]) = run0(r)
  }
  // A condition which refers to both sources but doesn't have the right shape
  // to become the join condition:
  final case class OtherCond[A](run0: (Fix[LogicalPlan], Fix[LogicalPlan]) => A) extends Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]) = run0(l,r)
  }
  // An expression that doesn't refer to any source.
  final case class NeitherCond[A](run0: A) extends Component[A] {
    def run(l: Fix[LogicalPlan], r: Fix[LogicalPlan]) = run0
  }

  // TODO add scalaz propery test
  implicit val ComponentApplicative = new Applicative[Component] {
    def point[A](a: => A): Component[A] = NeitherCond(a)

    def ap[A, B](fa: => Component[A])(f: => Component[A => B]): Component[B] =
      (fa, f) match {
             // A             // A => B
        case (NeitherCond(a), NeitherCond(g)) => NeitherCond(g(a))

             // A             // LP => A => B
        case (NeitherCond(a), LeftCond(g))    => LeftCond(g(_)(a))
             // A             // LP => A => B
        case (NeitherCond(a), RightCond(g))   => RightCond(g(_)(a))

             // LP => A       // A => B
        case (LeftCond(a),    NeitherCond(g)) => LeftCond(g <<< a) // lp => g(a(lp))
             // LP => A       // LP => A => B
        case (LeftCond(a),    LeftCond(g))    => LeftCond(lp => g(lp)(a(lp)))

             // LP => A       // A => B
        case (RightCond(a),   NeitherCond(g)) => RightCond(g <<< a)
             // LP => A       // LP => A => B
        case (RightCond(a),   RightCond(g))   => RightCond(lp => g(lp)(a(lp)))

        case (ca, cg)                         => OtherCond((l, r) => cg.run(l, r)(ca.run(l, r)))
    }
  }

  /** Rewrite joins and subsequent filtering so that:
    * 1) Filtering that is equivalent to an equi-join is rewritten into the join condition.
    * 2) Filtering that refers to only side of the join is hoisted prior to the join.
    * The input plan must have been simplified already so that the structure
    * is in a canonical form for inspection.
    */
  val rewriteCrossJoinsƒ: LogicalPlan[(Fix[LogicalPlan], Fix[LogicalPlan])] => State[NameGen, Fix[LogicalPlan]] = { node =>
    import quasar.fp._

    def preserveFree(x: (Fix[LogicalPlan], Fix[LogicalPlan])) = preserveFree0(x)(ι)

    def flattenAnd: Fix[LogicalPlan] => List[Fix[LogicalPlan]] = {
      case Fix(InvokeFUnapply(relations.And, ts)) => ts.unsized.flatMap(flattenAnd)
      case t                                      => List(t)
    }

    def toComp(left: Fix[LogicalPlan], right: Fix[LogicalPlan])(c: Fix[LogicalPlan]):
        Component[Fix[LogicalPlan]] = {
      c.para[Component[Fix[LogicalPlan]]] {
        case t if t.map(_._1) ≟ left.unFix  => LeftCond(ι)
        case t if t.map(_._1) ≟ right.unFix => RightCond(ι)

        case InvokeFUnapply(relations.Eq, Sized((_, LeftCond(lc)), (_, RightCond(rc)))) =>
          EquiCond((l, r) => Fix(relations.Eq(lc(l), rc(r))))
        case InvokeFUnapply(relations.Eq, Sized((_, RightCond(rc)), (_, LeftCond(lc)))) =>
          EquiCond((l, r) => Fix(relations.Eq(rc(r), lc(l))))

        case InvokeFUnapply(func @ UnaryFunc(_, _, _, _, _, _, _, _), Sized(t1)) =>
          Func.Input1(t1).map(_._2).sequence[Component, Fix[LogicalPlan]].map(ts => Fix(InvokeF(func, ts)))

        case InvokeFUnapply(func @ BinaryFunc(_, _, _, _, _, _, _, _), Sized(t1, t2)) =>
          Func.Input2(t1, t2).map(_._2).sequence[Component, Fix[LogicalPlan]].map(ts => Fix(InvokeF(func, ts)))

        case InvokeFUnapply(func @ TernaryFunc(_, _, _, _, _, _, _, _), Sized(t1, t2, t3)) =>
          Func.Input3(t1, t2, t3).map(_._2).sequence[Component, Fix[LogicalPlan]].map(ts => Fix(InvokeF(func, ts)))

        case t => NeitherCond(Fix(t.map(_._1)))
      }
    }

    def assembleCond(conds: List[Fix[LogicalPlan]]): Fix[LogicalPlan] =
      conds.foldLeft(Constant(Data.True))((acc, c) => Fix(relations.And(acc, c)))

    def newJoin(lSrc: Fix[LogicalPlan], rSrc: Fix[LogicalPlan], comps: List[Component[Fix[LogicalPlan]]]):
        State[NameGen, Fix[LogicalPlan]] = {
      val equis    = comps.collect { case c @ EquiCond(_) => c }
      val lefts    = comps.collect { case c @ LeftCond(_) => c }
      val rights   = comps.collect { case c @ RightCond(_) => c }
      val others   = comps.collect { case c @ OtherCond(_) => c }
      val neithers = comps.collect { case c @ NeitherCond(_) => c }

      for {
        lName  <- freshName("leftSrc")
        lFName <- freshName("left")
        rName  <- freshName("rightSrc")
        rFName <- freshName("right")
        jName  <- freshName("joined")
      } yield {
        // NB: simplifying eagerly to make matching easier up the tree
        simplify(
          Let(lName, lSrc,
            Let(lFName, Fix(Filter(Free(lName), assembleCond(lefts.map(_.run0(Free(lName)))))),
              Let(rName, rSrc,
                Let(rFName, Fix(Filter(Free(rName), assembleCond(rights.map(_.run0(Free(rName)))))),
                  Let(jName,
                    Fix(InnerJoin(Free(lFName), Free(rFName),
                      assembleCond(equis.map(_.run(Free(lFName), Free(rFName)))))),
                    Fix(Filter(Free(jName), assembleCond(
                      others.map(_.run0(JoinDir.Left.projectFrom(Free(jName)), JoinDir.Right.projectFrom(Free(jName)))) ++
                      neithers.map(_.run0))))))))))
      }
    }


    node match {
      case InvokeFUnapply(Filter, Sized((src, Fix(InvokeFUnapply(InnerJoin, Sized(joinL, joinR, joinCond)))), (cond, _))) =>
        val comps = flattenAnd(joinCond).map(toComp(joinL, joinR)) ++
                    flattenAnd(cond).map(toComp(JoinDir.Left.projectFrom(src), JoinDir.Right.projectFrom(src)))
        newJoin(joinL, joinR, comps)
      case InvokeFUnapply(InnerJoin, Sized((srcL, _), (srcR, _), (_, joinCond))) =>
        newJoin(srcL, srcR, flattenAnd(joinCond).map(toComp(srcL, srcR)))
      case _ => State.state(Fix(node.map(preserveFree)))
    }
  }

  /** Apply universal, type-oblivious transformations intended to
    * improve the performance of a query regardless of the backend. The
    * input is expected to come straight from the SQL^2 compiler or
    * another source of un-optimized queries.
    */
  val optimize: Fix[LogicalPlan] => Fix[LogicalPlan] =
    NonEmptyList[Fix[LogicalPlan] => Fix[LogicalPlan]](
      // Eliminate extraneous constants, etc.:
      _.transCata(repeatedly(simplifyƒ)),

      // NB: must precede normalizeLets to eliminate possibility of shadowing:
      normalizeTempNames,

      // NB: must precede rewriteCrossJoins to normalize Filter/Join shapes:
      normalizeLets,

      // Now for the big one:
      boundParaS(_)(rewriteCrossJoinsƒ).evalZero,

      // Eliminate trivial bindings introduced in rewriteCrossJoins:
      _.transCata(repeatedly(simplifyƒ)),

      // Final pass to normalize the resulting plans for better matching in tests:
      normalizeLets,

      // This time, fix the names last so they will read naturally:
      normalizeTempNames

    ).foldLeft1(_ >>> _)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy