com.qiniu.pipeline.sdk.plugin.ScalaParser.scala Maven / Gradle / Ivy
The newest version!
package com.qiniu.pipeline.sdk.plugin
import java.io.{Serializable => JSerializable}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import scala.collection.JavaConversions._
/**
* Scala编写Plugin时, 需继承ScalaParser类
*
* @param order 系统内部使用字段
* @param pluginFields transform spec对应plugin中output所有字段
* @param schema 用户打点/输入数据的schema
* @param configuration 系统内部使用字段
*/
abstract class ScalaParser(order: Integer,
pluginFields: Seq[String],
schema: StructType,
configuration: Map[String, JSerializable])
extends Parser(order, pluginFields, schema, configuration) {
/**
* 构造方法,Scala编写Plugin时, 需继承ScalaParser类
*
* @param pluginFields transform spec对应plugin中output所有字段
* @param schema 用户打点/输入数据的schema
* @return
*/
def this(pluginFields: Seq[String], schema: StructType) = this(1, pluginFields, schema, Map.empty[String, Serializable])
/**
* 获取用户打点/输入数据的schema
*
* @return 返回StructType, 即用户打点/输入数据的schema
*/
final override def getSchema(): StructType = schema
/**
*
* @param originData 用户每行打点/输入数据
* @return 返回Seq[Row], 即plugin支持单行到多行的转换, 且Seq中每行数据必须由pluginFields中所有字段对应的字段值共同组成
*/
def parse(originData: Row): Seq[Row]
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy