All Downloads are FREE. Search and download functionalities are using the official Maven repository.

zhao.algorithmMagic.io.OutputSparkDF.scala Maven / Gradle / Ivy

There is a newer version: 1.42
Show newest version
package zhao.algorithmMagic.io

import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import zhao.algorithmMagic.exception.OperatorOperationException
import zhao.algorithmMagic.operands.matrix.{ColorMatrix, ColumnDoubleMatrix, ColumnIntegerMatrix}
import zhao.algorithmMagic.operands.table.DataFrame

import scala.collection.mutable
import scala.jdk.CollectionConverters.{asScalaBufferConverter, iterableAsScalaIterableConverter}

/**
 * 将 AS 的 DaraFrame 数据对象 转换成为 Spark DataFrame 对象的设备输出对象。
 *
 * @param sparkSession 输出目标对应的 spark会话对象。
 * @param table        表的名称
 * @author zhao
 */
class OutputSparkDF(sparkSession: SparkSession, table: String) extends OutputComponent {

  /**
   * 启动数据输出组件.
   * 

* Start data output component. * * @return 如果启动成功返回true */ override def open(): Boolean = this.isOpen /** * @return 如果组件已经启动了,在这里返回true. *

* If the component has already started, return true here */ override def isOpen: Boolean = this.sparkSession != null /** * 将一份二进制数据输出。 *

* Output a binary data. * * @param data 需要被输出的二进制数据包。 *

* The binary data package that needs to be output. */ override def writeByteArray(data: Array[Byte]): Unit = throw new OperatorOperationException("暂不支持字节数组对象的输出。") /** * 输出一个 整形 矩阵对象 * * @param matrix 需要被输出的矩阵 */ override def writeMat(matrix: ColumnIntegerMatrix): Unit = throw new OperatorOperationException("暂不支持矩阵数值对象的输出。") /** * 输出一个 double类型的 矩阵对象 * * @param matrix 需要被输出的矩阵 */ override def writeMat(matrix: ColumnDoubleMatrix): Unit = throw new OperatorOperationException("暂不支持矩阵数值对象的输出。") /** * 将图像矩阵所包含的图像直接输出到目标。 *

* Directly output the images contained in the image matrix to the target. * * @param colorMatrix 需要被输出的图像矩阵对象。 *

* The image matrix object that needs to be output. */ override def writeImage(colorMatrix: ColorMatrix): Unit = throw new OperatorOperationException("暂不支持矩阵数值对象的输出。") /** * 将一个 DataFrame 中的数据按照数据输出组件进行输出. *

* Output the data in a DataFrame according to the data output component. * * @param dataFrame 需要被输出的数据对象 */ override def writeDataFrame(dataFrame: DataFrame): Unit = { val fields1 = dataFrame.getFields val fields2 = mutable.ListBuffer[StructField]() fields1.forEach(cell => fields2.append(StructField( cell.toString, if (cell.isNumber) DoubleType else StringType, nullable = true ))) val value = sparkSession.sparkContext.makeRDD( dataFrame.toList.asScala ).map(line => Row.fromSeq(line.asScala.map(_.toString).toSeq)) sparkSession.createDataFrame(value, StructType(fields2)).createTempView(this.table) } override def close(): Unit = {} } object OutputSparkDF { def builder() = new OutputSparkDFBuilder() }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy