All Downloads are FREE. Search and download functionalities are using the official Maven repository.

widebase.io.csv.package.scala Maven / Gradle / Ivy

The newest version!
package widebase.io

import java.io. {

  BufferedReader,
  FileInputStream,
  FileReader,
  InputStreamReader

}

import java.util.zip. { GZIPInputStream, ZipInputStream }

import java.sql.Timestamp

import org.joda.time. {

  LocalDate,
  LocalDateTime,
  LocalTime,
  Minutes,
  Seconds,
  YearMonth

}

import widebase.data.Datatype

import widebase.db.column. {

  BoolColumn,
  ByteColumn,
  CharColumn,
  DoubleColumn,
  FloatColumn,
  IntColumn,
  LongColumn,
  ShortColumn,
  MonthColumn,
  DateColumn,
  MinuteColumn,
  SecondColumn,
  TimeColumn,
  DateTimeColumn,
  TimestampColumn,
  SymbolColumn,
  StringColumn,

  TypedColumn

}

import widebase.io.csv.filter.ZipFilter

/** CSV I/O.
 *
 * Example how to import data into columns:
 *
 * {{{
 * val columns = csv.columns("ZddS", ",", filter.none, "data.csv.gz", "r")
 * }}}
 *
 * @author myst3r10n
 */
package object csv {

  import widebase.data
  import widebase.data.Datatype.Datatype
  import widebase.io.csv.filter.ZipFilter.ZipFilter

  /** Reference to itself. */
  val ref = this

  /** Predefined routines to processing CSV files. */
  object table extends TableProcessor

  /** Reads columns from csv file.
   *
   * @param tokens tokenized types
   * @param delimiter of CSV file
   * @param filter individual processing
   * @param filename to CSV file
   * @param zipped self-explanatory
   *
   * @return processed array of [[widebase.db.column.VariantColumn]]
  */
  def columns(
    tokens: String,
    delimiter: String,
    filter: Array[String] => Array[String],
    filename: String,
    zipped: String = ""): Array[TypedColumn[_]] = {

    val zipFilter =
      if(zipped.contains("g"))
        ZipFilter.Gzip
      else if(zipped.contains("z"))
        ZipFilter.Zlib
      else
        ZipFilter.None

    columns(data.by(tokens), delimiter, filter, filename, zipFilter)

  }

  /** Reads a CSV file into columns.
   *
   * @param valueTypes resolved types
   * @param delimiter of CSV file
   * @param filter individual processing
   * @param filename to CSV file
   * @param zipFilter compression filter
   *
   * @return processed array of [[widebase.db.column.VariantColumn]]
  */
  def columns(
    valueTypes: Array[Datatype],
    delimiter: String,
    filter: Array[String] => Array[String],
    filename: String,
    zipFilter: ZipFilter): Array[TypedColumn[_]] = {

    var reader: BufferedReader = null

    zipFilter match {

      case ZipFilter.None =>
        reader = new BufferedReader(new FileReader(filename))

      case ZipFilter.Gzip =>
        reader = new BufferedReader(new InputStreamReader(
          new GZIPInputStream(new FileInputStream(filename))))

      case ZipFilter.Zlib =>
        reader = new BufferedReader(new InputStreamReader(
          new ZipInputStream(new FileInputStream(filename))))

    }

    var line = reader.readLine

    var columns: Array[TypedColumn[_]] = null

    while(line != null) {

      val csv = filter(line.split(delimiter))

      if(columns == null)
        columns = Array.fill(csv.size)(null)

      var i = 0

      csv.foreach { value =>

        valueTypes(i) match {

          case Datatype.Bool => 
            if(columns(i) == null)
              columns(i) = new BoolColumn

            if(value == "true")
              columns(i).asInstanceOf[BoolColumn] += true
            else if(value == "false")
              columns(i).asInstanceOf[BoolColumn] += false
            else {

              val boolValue = value.getBytes()(0)

              if(boolValue == '1')
                columns(i).asInstanceOf[BoolColumn] += true
              else if(boolValue == '0')
                columns(i).asInstanceOf[BoolColumn] += false
              else
                throw TypeMismatchException(Datatype.Bool, boolValue.toString)

            }

          case Datatype.Byte =>
            if(columns(i) == null)
              columns(i) = new ByteColumn

            columns(i).asInstanceOf[ByteColumn] += java.lang.Byte.valueOf(value)

          case Datatype.Char =>
            if(columns(i) == null)
              columns(i) = new CharColumn

            columns(i).asInstanceOf[CharColumn] += value.toCharArray.head

          case Datatype.Double =>
            if(columns(i) == null)
              columns(i) = new DoubleColumn

            columns(i).asInstanceOf[DoubleColumn] += value.toDouble

          case Datatype.Float =>
            if(columns(i) == null)
              columns(i) = new FloatColumn

            columns(i).asInstanceOf[FloatColumn] += value.toFloat

          case Datatype.Int =>
            if(columns(i) == null)
              columns(i) = new IntColumn

            columns(i).asInstanceOf[IntColumn] += value.toInt

          case Datatype.Long =>
            if(columns(i) == null)
              columns(i) = new LongColumn

            columns(i).asInstanceOf[LongColumn] += value.toLong

          case Datatype.Short =>
            if(columns(i) == null)
              columns(i) = new ShortColumn

            columns(i).asInstanceOf[ShortColumn] += value.toShort

          case Datatype.Month =>
            if(columns(i) == null)
              columns(i) = new MonthColumn

            columns(i).asInstanceOf[MonthColumn] += new YearMonth(value.toLong)

          case Datatype.Date =>
            if(columns(i) == null)
              columns(i) = new DateColumn

            columns(i).asInstanceOf[DateColumn] += new LocalDate(value.toLong)

          case Datatype.Minute =>
            if(columns(i) == null)
              columns(i) = new MinuteColumn

            columns(i).asInstanceOf[MinuteColumn] += Minutes.minutes(value.toInt)

          case Datatype.Second =>
            if(columns(i) == null)
              columns(i) = new SecondColumn

            columns(i).asInstanceOf[SecondColumn] += Seconds.seconds(value.toInt)

          case Datatype.Time =>
            if(columns(i) == null)
              columns(i) = new TimeColumn

            columns(i).asInstanceOf[TimeColumn] += new LocalTime(value.toLong)

          case Datatype.DateTime =>
            if(columns(i) == null)
              columns(i) = new DateTimeColumn

            columns(i).asInstanceOf[DateTimeColumn] += new LocalDateTime(value.toLong)

          case Datatype.Timestamp =>
            if(columns(i) == null)
              columns(i) = new TimestampColumn

            columns(i).asInstanceOf[TimestampColumn] += new Timestamp(value.toLong)

          case Datatype.Symbol =>
            if(columns(i) == null)
              columns(i) = new SymbolColumn

            columns(i).asInstanceOf[SymbolColumn] += Symbol(value)

          case Datatype.String =>
            if(columns(i) == null)
              columns(i) = new StringColumn

            columns(i).asInstanceOf[StringColumn] += value

        }

        i += 1

      }

      line = reader.readLine

    }

    reader.close

    columns

  }

  /** Scope properties. */
  def props = Props

}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy