Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package com.acxiom.pipeline.steps
import com.acxiom.pipeline.PipelineContext
import com.acxiom.pipeline.annotations.{StepFunction, StepObject}
import com.mongodb.spark.MongoSpark
import com.mongodb.spark.config.WriteConfig
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{StringType, StructField, StructType}
object InputOutputSteps {
"Load File as Data Frame",
"This step will load a file from the provided URL",
def loadFile(url: String, format: String, separator: Option[String], pipelineContext: PipelineContext): DataFrame = {
loadFileWithSchema(url, format, separator, None, pipelineContext)
"Load File as Data Frame with schema",
"This step will load a file from the provided URL using the provided schema",
def loadFileWithSchema(url: String, format: String, separator: Option[String], schema: Option[StructType] = None,
pipelineContext: PipelineContext): DataFrame = {
val dfr = if (separator.isDefined) {"sep", separator.get.toCharArray.head)
} else {
if (schema.isEmpty) {
} else {
"Write Data Frame to a json file",
"This step will write a DataFrame from the provided URL",
def writeJSONFile(dataFrame: DataFrame, url: String, mode: String = "error"): Unit = {
"Read header from a file",
"This step will load the first line of a file and parse it into column names",
def readHeader(url: String, format: String, separator: Option[String]): List[String] = {
val input = new FileInputStream(url)
val head = Source.fromInputStream(input).getLines().next()
"Create a DataFrame schema",
"This step will create a DataFrame schema from a list of column names",
def createSchema(columnNames: List[String]): StructType = {
StructType(, StringType, nullable = true)))
"Writes a DataFrame to a Mongo database",
"This step will write the contents of a DataFrame to the Mongo database and collection specified",
def writeDataFrameToMongo(dataFrame: DataFrame, uri: String, collectionName: String): Unit =, WriteConfig(Map("collection" -> collectionName, "uri" -> uri)))