All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qiniu.pipeline.sdk.plugin.ScalaParser.scala Maven / Gradle / Ivy

The newest version!
package com.qiniu.pipeline.sdk.plugin

import java.io.{Serializable => JSerializable}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import scala.collection.JavaConversions._


/**
  * Scala编写Plugin时, 需继承ScalaParser类
  *
  * @param order         系统内部使用字段
  * @param pluginFields  transform spec对应plugin中output所有字段
  * @param schema        用户打点/输入数据的schema
  * @param configuration 系统内部使用字段
  */
abstract class ScalaParser(order: Integer,
                           pluginFields: Seq[String],
                           schema: StructType,
                           configuration: Map[String, JSerializable])
    extends Parser(order, pluginFields, schema, configuration) {

  /**
    * 构造方法,Scala编写Plugin时, 需继承ScalaParser类
    *
    * @param pluginFields transform spec对应plugin中output所有字段
    * @param schema       用户打点/输入数据的schema
    * @return
    */

  def this(pluginFields: Seq[String], schema: StructType) = this(1, pluginFields, schema, Map.empty[String, Serializable])

  /**
    * 获取用户打点/输入数据的schema
    *
    * @return 返回StructType, 即用户打点/输入数据的schema
    */
  final override def getSchema(): StructType = schema

  /**
    *
    * @param originData 用户每行打点/输入数据
    * @return 返回Seq[Row],  即plugin支持单行到多行的转换, 且Seq中每行数据必须由pluginFields中所有字段对应的字段值共同组成
    */
  def parse(originData: Row): Seq[Row]
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy