All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.spark.sql.streaming.EsStreamQueryWriter.scala Maven / Gradle / Ivy

package org.elasticsearch.spark.sql.streaming

import org.apache.spark.TaskContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.types.StructType
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException
import org.elasticsearch.hadoop.serialization.BytesConverter
import org.elasticsearch.hadoop.serialization.JdkBytesConverter
import org.elasticsearch.hadoop.serialization.builder.ValueWriter
import org.elasticsearch.hadoop.serialization.field.FieldExtractor
import org.elasticsearch.spark.rdd.EsRDDWriter
import org.elasticsearch.spark.sql.DataFrameFieldExtractor
import org.elasticsearch.spark.sql.DataFrameValueWriter

/**
 * Takes in iterator of
 */
private [sql] class EsStreamQueryWriter(serializedSettings: String,
                                        schema: StructType,
                                        commitProtocol: EsCommitProtocol)
  extends EsRDDWriter[InternalRow](serializedSettings) {

  override protected def valueWriter: Class[_ <: ValueWriter[_]] = classOf[DataFrameValueWriter]
  override protected def bytesConverter: Class[_ <: BytesConverter] = classOf[JdkBytesConverter]
  override protected def fieldExtractor: Class[_ <: FieldExtractor] = classOf[DataFrameFieldExtractor]

  private val encoder: ExpressionEncoder[Row] = RowEncoder(schema).resolveAndBind()

  override def write(taskContext: TaskContext, data: Iterator[InternalRow]): Unit = {
    // Keep clients from using this method, doesn't return task commit information.
    throw new EsHadoopIllegalArgumentException("Use run(taskContext, data) instead to retrieve the commit information")
  }

  def run(taskContext: TaskContext, data: Iterator[InternalRow]): TaskCommit = {
    val taskInfo = TaskState(taskContext.partitionId(), settings.getResourceWrite)
    commitProtocol.initTask(taskInfo)
    try {
      super.write(taskContext, data)
    } catch {
      case t: Throwable =>
        commitProtocol.abortTask(taskInfo)
        throw t
    }
    commitProtocol.commitTask(taskInfo)
  }

  override protected def processData(data: Iterator[InternalRow]): Any = {
    val row = encoder.fromRow(data.next())
    commitProtocol.recordSeen()
    (row, schema)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy