All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.spark.sql.catalyst.PlanUtils.scala Maven / Gradle / Ivy

/*
 * Copyright (C) 2020 The Flowman Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dimajix.spark.sql.catalyst

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.Alias
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.expressions.AttributeSet
import org.apache.spark.sql.catalyst.expressions.ExprId
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.catalyst.expressions.NamedExpression
import org.apache.spark.sql.catalyst.expressions.Unevaluable
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.catalyst.plans.logical.With
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.types.Metadata
import org.apache.spark.sql.types.StructType


case class NamedAttribute(name:String, dataType: DataType)(
    val exprId: ExprId = NamedExpression.newExprId,
    val qualifier: Seq[String] = Seq.empty,
    val explicitMetadata: Metadata = Metadata.empty
)
extends Attribute with Unevaluable {
    @transient
    override lazy val references: AttributeSet = AttributeSet.empty

    override def newInstance(): NamedAttribute = this

    override def nullable: Boolean = true

    override def metadata: Metadata = explicitMetadata

    override def withNullability(newNullability: Boolean): Attribute = this
    override def withQualifier(newQualifier: Seq[String]): Attribute = NamedAttribute(name, dataType)(exprId, newQualifier, explicitMetadata)
    override def withName(newName: String): Attribute = NamedAttribute(newName, dataType)(exprId, qualifier, explicitMetadata)
    override def withMetadata(newMetadata: Metadata): Attribute = NamedAttribute(name, dataType)(exprId, qualifier, newMetadata)
    /*override*/ def withExprId(newExprId: ExprId): Attribute = NamedAttribute(name, dataType)(newExprId, qualifier, metadata)
    /*override*/ def withDataType(newType: DataType): Attribute = NamedAttribute(name, newType)(exprId, qualifier, metadata)
}


object PlanUtils {
    def analyze(spark:SparkSession, logicalPlan: LogicalPlan) : LogicalPlan = {
        spark.sessionState.analyzer.execute(logicalPlan)
    }

    def singleRowPlan(schema:StructType) : LogicalPlan = {
        val expressions = schema.map { field =>
            val literal =
                if (field.nullable) {
                    Literal(null, field.dataType)
                }
                else {
                    Literal.default(field.dataType)
                }
            Alias(literal, field.name)(explicitMetadata = Option(field.metadata))
        }
        Project(expressions, OneRowRelation())
    }

    def namedAttributePlan(schema:StructType) : LogicalPlan = {
        val expressions = schema.map { field =>
            NamedAttribute(field.name, field.dataType)(explicitMetadata = field.metadata)
        }
        Project(expressions, OneRowRelation())
    }

    /**
     * Replace all dependencies in a given LogicalPlan by dummy dependencies with the specified schema
     */
    def replaceDependencies(plan:LogicalPlan, deps:Map[String, StructType]) : LogicalPlan = {
        object ReplaceRelation extends Rule[LogicalPlan] {
            val replacements = deps.map { case (name, schema) =>
                name -> singleRowPlan(schema)
            }

            override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
                case relation: UnresolvedRelation =>
                    replacements.getOrElse(relation.tableName, relation)
                case With(child, ctes) =>
                    With(child, ctes.map { case(k,v) => k -> v.copy(child = apply(v.child)) })
            }
        }

        object Replacer extends RuleExecutor[LogicalPlan] {
            override protected def batches: Seq[Batch] = Seq(
                Batch("Replace all relations", Once, ReplaceRelation)
            )
        }

        Replacer.execute(plan)
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy