All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.internal.DataFrameWriterImpl.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.internal

import scala.jdk.CollectionConverters._

import org.apache.spark.annotation.Stable
import org.apache.spark.connect.proto
import org.apache.spark.sql.{DataFrameWriter, Dataset, SaveMode}

/**
 * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems, key-value
 * stores, etc). Use `Dataset.write` to access this.
 *
 * @since 3.4.0
 */
@Stable
final class DataFrameWriterImpl[T] private[sql] (ds: Dataset[T]) extends DataFrameWriter[T] {

  /** @inheritdoc */
  override def mode(saveMode: SaveMode): this.type = super.mode(saveMode)

  /** @inheritdoc */
  override def mode(saveMode: String): this.type = super.mode(saveMode)

  /** @inheritdoc */
  override def format(source: String): this.type = super.format(source)

  /** @inheritdoc */
  override def option(key: String, value: String): this.type = super.option(key, value)

  /** @inheritdoc */
  override def option(key: String, value: Boolean): this.type = super.option(key, value)

  /** @inheritdoc */
  override def option(key: String, value: Long): this.type = super.option(key, value)

  /** @inheritdoc */
  override def option(key: String, value: Double): this.type = super.option(key, value)

  /** @inheritdoc */
  override def options(options: scala.collection.Map[String, String]): this.type =
    super.options(options)

  /** @inheritdoc */
  override def options(options: java.util.Map[String, String]): this.type =
    super.options(options)

  /** @inheritdoc */
  @scala.annotation.varargs
  override def partitionBy(colNames: String*): this.type = super.partitionBy(colNames: _*)

  /** @inheritdoc */
  @scala.annotation.varargs
  override def bucketBy(numBuckets: Int, colName: String, colNames: String*): this.type =
    super.bucketBy(numBuckets, colName, colNames: _*)

  /** @inheritdoc */
  @scala.annotation.varargs
  override def sortBy(colName: String, colNames: String*): this.type =
    super.sortBy(colName, colNames: _*)

  /** @inheritdoc */
  @scala.annotation.varargs
  override def clusterBy(colName: String, colNames: String*): this.type =
    super.clusterBy(colName, colNames: _*)

  /** @inheritdoc */
  def save(path: String): Unit = {
    saveInternal(Some(path))
  }

  /** @inheritdoc */
  def save(): Unit = saveInternal(None)

  private def saveInternal(path: Option[String]): Unit = {
    executeWriteOperation(builder => path.foreach(builder.setPath))
  }

  private def executeWriteOperation(f: proto.WriteOperation.Builder => Unit): Unit = {
    val builder = proto.WriteOperation.newBuilder()

    builder.setInput(ds.plan.getRoot)

    // Set path or table
    f(builder)

    // Cannot both be set
    require(!(builder.hasPath && builder.hasTable))

    builder.setMode(mode match {
      case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
      case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
      case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
      case SaveMode.ErrorIfExists => proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
    })

    if (source.nonEmpty) {
      builder.setSource(source)
    }
    sortColumnNames.foreach(names => builder.addAllSortColumnNames(names.asJava))
    partitioningColumns.foreach(cols => builder.addAllPartitioningColumns(cols.asJava))
    clusteringColumns.foreach(cols => builder.addAllClusteringColumns(cols.asJava))

    numBuckets.foreach(n => {
      val bucketBuilder = proto.WriteOperation.BucketBy.newBuilder()
      bucketBuilder.setNumBuckets(n)
      bucketColumnNames.foreach(names => bucketBuilder.addAllBucketColumnNames(names.asJava))
      builder.setBucketBy(bucketBuilder)
    })

    extraOptions.foreach { case (k, v) =>
      builder.putOptions(k, v)
    }

    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperation(builder).build())
  }

  /** @inheritdoc */
  def insertInto(tableName: String): Unit = {
    executeWriteOperation(builder => {
      builder.setTable(
        proto.WriteOperation.SaveTable
          .newBuilder()
          .setTableName(tableName)
          .setSaveMethod(
            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO))
    })
  }

  /** @inheritdoc */
  def saveAsTable(tableName: String): Unit = {
    executeWriteOperation(builder => {
      builder.setTable(
        proto.WriteOperation.SaveTable
          .newBuilder()
          .setTableName(tableName)
          .setSaveMethod(
            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE))
    })
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy