
com.tribbloids.spookystuff.sparkbinding.StringRDDView.scala Maven / Gradle / Ivy
package com.tribbloids.spookystuff.sparkbinding
import org.apache.spark.rdd.RDD
import scala.collection.immutable.ListMap
/**
* Created by peng on 12/06/14.
*/
class StringRDDView(val self: RDD[String]) {
//csv has to be headerless, there is no better solution as header will be shuffled to nowhere
def csvToMap(headerRow: String, splitter: String = ","): RDD[Map[String,String]] = {
val headers = headerRow.split(splitter)
//cannot handle when a row is identical to headerline, but whatever
self.map {
str => {
val values = str.split(splitter)
ListMap(headers.zip(values): _*)
}
}
}
def tsvToMap(headerRow: String) = csvToMap(headerRow,"\t")
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy