All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tribbloids.spookystuff.sparkbinding.StringRDDView.scala Maven / Gradle / Ivy

package com.tribbloids.spookystuff.sparkbinding

import org.apache.spark.rdd.RDD

import scala.collection.immutable.ListMap

/**
 * Created by peng on 12/06/14.
 */
class StringRDDView(val self: RDD[String]) {

  //csv has to be headerless, there is no better solution as header will be shuffled to nowhere
  def csvToMap(headerRow: String, splitter: String = ","): RDD[Map[String,String]] = {
    val headers = headerRow.split(splitter)

    //cannot handle when a row is identical to headerline, but whatever
    self.map {
      str => {
        val values = str.split(splitter)

        ListMap(headers.zip(values): _*)
      }
    }
  }

  def tsvToMap(headerRow: String) = csvToMap(headerRow,"\t")
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy