com.dimajix.spark.sql.execution.EagerCacheExec.scala Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of flowman-spark-extensions Show documentation

Flowman Spark extensions

There is a newer version: 1.2.0-synapse3.3-spark3.3-hadoop3.3

Show newest version

/*
 * Copyright (C) 2021 The Flowman Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dimajix.spark.sql.execution

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.expressions.SortOrder
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.vectorized.ColumnarBatch


case class EagerCacheExec(child: SparkPlan, caches:Seq[SparkPlan]) extends SparkPlan {
    override def children: Seq[SparkPlan] = child +: caches
    override def output: Seq[Attribute] = child.output

    override def outputPartitioning: Partitioning = child.outputPartitioning
    override def outputOrdering: Seq[SortOrder] = child.outputOrdering

    override def supportsColumnar: Boolean = child.supportsColumnar
    override def vectorTypes: Option[Seq[String]] = child.vectorTypes

    override protected def doCanonicalize(): SparkPlan = copy(child=child.canonicalized, caches.map(_.canonicalized))

    override protected def doExecute(): RDD[InternalRow] = {
        child.execute()
    }

    override protected def doExecuteColumnar(): RDD[ColumnarBatch] = {
        child.executeColumnar()
    }

    override protected def doPrepare(): Unit = {
        caches.foreach(_.execute().count())
    }

    override protected def withNewChildrenInternal(newChildren: IndexedSeq[SparkPlan]): SparkPlan =
        copy(child=newChildren.head, caches=newChildren.tail)
}