All Downloads are FREE. Search and download functionalities are using the official Maven repository.

widebase.io.table.FileTableSave.scala Maven / Gradle / Ivy

The newest version!
package widebase.io.table

import java.io. { File, RandomAccessFile }
import java.sql.Timestamp

import org.joda.time. {

  LocalDate,
  LocalDateTime,
  LocalTime,
  Minutes,
  Seconds,
  YearMonth

}

import scala.collection.mutable. { ArrayBuffer, Buffer, Map }

import widebase.data.Datatype

import widebase.db.column. {

  BoolColumn,
  ByteColumn,
  CharColumn,
  DoubleColumn,
  FloatColumn,
  IntColumn,
  LongColumn,
  ShortColumn,
  MonthColumn,
  DateColumn,
  MinuteColumn,
  SecondColumn,
  TimeColumn,
  DateTimeColumn,
  TimestampColumn,
  SymbolColumn,
  StringColumn

}

import widebase.db.table.Table
import widebase.io.VariantWriter
import widebase.io.column. { ColumnWriter, FileColumnSaver }
import widebase.io.file.FileVariantWriter
import widebase.io.filter. { MagicId, StreamFilter }

/** Saves tables into database.
 *
 * @param path of database
 *
 * @author myst3r10n
 */
abstract class FileTableSave(path: String) {

  import PartitionDomain.PartitionDomain

  import widebase.io.filter.StreamFilter.StreamFilter

  /** Saves table into database.
    *
    * @param name of table
    * @param table self-explanatory
    * @param filter self-explanatory
    * @param level of compression 0-9
    * @param segmented path of segment
   */
  def apply(
    name: String,
    table: Table,
    filter: StreamFilter = props.filters.saver,
    level: Int = props.levels.saver)
    (implicit segmented: File = null) {

    val fileExtension =
      filter match {

        case StreamFilter.Gzip => ".gz"
        case StreamFilter.Zlib => ".zip"
        case _ => ""

      }

    var dir =
      if(segmented == null)
        path
      else
        segmented.getPath

    val file = new File(dir + "/" + name + fileExtension)

    if(file.exists)
      file.delete

    val channel = new RandomAccessFile(file.getPath, "rw").getChannel
    channel.tryLock
    val vwriter = new VariantWriter(channel, filter, level) {

      override val charset = props.charsets.saver

      override def capacity = props.capacities.saver
      override def order = props.orders.saver

    }

    val writer = new ColumnWriter(vwriter)

    // Write column labels
    writer.write(table.labels)

    // Write column values
    table.columns.foreach(column => writer.write(column))

    writer.close

  }

  /** Saves table into directory table.
    *
    * @note Supports optional partitioned tables.
    *
    * @param name of table
    * @param table self-explanatory
    * @param parted partition symbol
    * @param segmented path of segment
   */
  def dir(
    name: String,
    table: Table)
    (implicit parted: Symbol = null, segmented: File = null) {

    if(parted != null) {

      val domain = parted match {

        case parted if
          parted == 'd ||
          parted == 'daily => PartitionDomain.Daily
        case parted if
          parted == 'm ||
          parted == 'monthly => PartitionDomain.Monthly
        case parted if
          parted == 'y ||
          parted == 'yearly => PartitionDomain.Yearly

      }

      this.parted(domain, name, table)(segmented)
      return

    }

    val saver = new FileColumnSaver(path)

    // Save column labels
    saver.save(name, ".d", table.labels)(null, segmented)

    // Save column values
    table.foreach { case (label, column) =>
      saver.save(name, label, column, true)(null, segmented) }

  }

  /** Saves table into partitioned directory table.
    *
    * @param domain of partition
    * @param name of table
    * @param table self-explanatory
    * @param segmented path of segment
   */
  def parted(
    domain: PartitionDomain,
    name: String,
    table: Table)(implicit segmented: File = null) {

    class Companion(var writer: FileVariantWriter, var lastEnded: Long)

    var dir: File = null

    var writers = ArrayBuffer[FileVariantWriter]()
    var symbolCompanions = Map[Int, Companion]()
    var stringCompanions = Map[Int, Companion]()

    def initWriters {

      table.foreach { case (name, column) =>

        val file = new File(dir.getPath + "/" + name)

        if(file.exists)
          file.delete

        val channel = new RandomAccessFile(file.getPath, "rw").getChannel
        channel.tryLock
        writers += new FileVariantWriter(channel) {

          override val charset = props.charsets.parted

          override def capacity = props.capacities.parted
          override def order = props.orders.parted

          // Write magic
          mode = Datatype.String
          write(MagicId.Column.toString)

          // Write column type
          mode = Datatype.Byte
          write(column.typeOf.id.toByte)

          // Write column length
          mode = Datatype.Int
          write(0)

          // Set column value type
          if(
            column.typeOf == Datatype.Symbol ||
            column.typeOf == Datatype.String)
            mode = Datatype.Long
          else
            mode = column.typeOf

        }

        if(
          column.typeOf == Datatype.String ||
          column.typeOf == Datatype.Symbol) {

          var companion: File = null

          if(column.typeOf == Datatype.Symbol)
            companion = new File(file.getPath + ".sym")
          else if(column.typeOf == Datatype.String)
            companion = new File(file.getPath + ".str")

          if(companion.exists)
            companion.delete

          val channel =
            new RandomAccessFile(companion.getPath, "rw").getChannel
          channel.tryLock

          val writer = new FileVariantWriter(channel) {

            override val charset = props.charsets.parted

            override def capacity = props.capacities.parted
            override def order = props.orders.parted

            mode = column.typeOf

          }

          column.typeOf match {

            case Datatype.Symbol => symbolCompanions +=
              writers.size - 1 -> new Companion(writer, 0L)

            case Datatype.String => stringCompanions +=
              writers.size - 1 -> new Companion(writer, 0L)

          }
        }
      }
    }

    def writeRecords(records: Int) {

      writers.foreach { writer =>

        if(writer.isOpen) {

          writer.flush
          writer.mode = Datatype.Byte
          writer.position =
            (MagicId.Column.toString.getBytes(writer.charset)).size + 1

          writer.mode = Datatype.Int
          writer.write(records)

        }
      }
    }

    def releaseWriters {

      writers.foreach(writer =>
        if(writer.isOpen) writer.close)
      writers.clear

      symbolCompanions.values.foreach(companion =>
        if(companion.writer.isOpen)
          companion.writer.close)
      symbolCompanions.clear

      stringCompanions.values.foreach(companion =>
        if(companion.writer.isOpen)
          companion.writer.close)
      stringCompanions.clear

    }

    val saver = new FileColumnSaver(path)
    var numberOfRecordsOnPart = 0
    var lastInt: Option[Int] = scala.None
    var lastDate: LocalDate = null

    for(r <- 0 to table.records.length - 1) {

      var partition: String = null

      domain match {

        case PartitionDomain.Int =>
          val int = table.columns.head(r).asInstanceOf[Int]

          var newPartition = false

          if(lastInt.isEmpty)
            newPartition = true
          else if(lastInt.get != int) {

            println("End of int " + int + " passed, +" +
              numberOfRecordsOnPart + " records")

            newPartition = true

          }

          if(newPartition) {

            lastInt = Some(int)
            partition = lastInt.get.toString

          }

        case part if 
          domain == PartitionDomain.Yearly ||
          domain == PartitionDomain.Monthly ||
          domain == PartitionDomain.Daily =>

          val dateTime = table.columns.head.typeOf match {

            case Datatype.Long => table.columns.head(r).asInstanceOf[Long]
            case Datatype.Month => table.columns.head(r).asInstanceOf[YearMonth].toLocalDate(1).toDateMidnight.getMillis
            case Datatype.Date => table.columns.head(r).asInstanceOf[LocalDate].toDateMidnight.getMillis
            case Datatype.Time => table.columns.head(r).asInstanceOf[LocalTime].getMillisOfDay
            case Datatype.DateTime => table.columns.head(r).asInstanceOf[LocalDateTime].toDateTime.getMillis
            case Datatype.Timestamp => table.columns.head(r).asInstanceOf[Timestamp].getTime

          }

          var newPartition = false

          if(lastDate == null)
            newPartition = true
          else {

            val today = new LocalDate(dateTime)

            domain match {

              case PartitionDomain.Yearly =>
                if(lastDate.getYear != today.getYear) {

                  println("End of year " + lastDate.toString("yyyy") +
                  " passed, +" + numberOfRecordsOnPart + " records")

                  newPartition = true

                }

              case PartitionDomain.Monthly =>
                if(
                  lastDate.getYear != today.getYear ||
                  lastDate.getMonthOfYear != today.getMonthOfYear) {

                  println("End of month " + lastDate.toString("yyyy-MM") +
                    " passed, +" + numberOfRecordsOnPart + " records")

                  newPartition = true

                }

              case PartitionDomain.Daily =>
                if(
                  lastDate.getYear != today.getYear ||
                  lastDate.getMonthOfYear != today.getMonthOfYear ||
                  lastDate.getDayOfMonth != today.getDayOfMonth) {

                  println("End of day " + lastDate.toString("yyyy-MM-dd") +
                    " passed, +" + numberOfRecordsOnPart + " records")

                  newPartition = true

                }
            }
          }

          if(newPartition) {

            lastDate = new LocalDate(dateTime)

            domain match {

              case PartitionDomain.Yearly => partition = lastDate.toString("yyyy")
              case PartitionDomain.Monthly => partition = lastDate.toString("yyyy-MM")
              case PartitionDomain.Daily => partition = lastDate.toString("yyyy-MM-dd")

            }
          }
      }

      if(partition != null) {

        writeRecords(numberOfRecordsOnPart)
        releaseWriters

        // Set column labels
        saver.save(name, ".d", table.labels)(partition, segmented)

        dir =
          if(segmented == null)
            new File(path + "/" + partition + "/" + name)
          else
            new File(segmented.getPath + "/" + partition + "/" + name)

        initWriters
        numberOfRecordsOnPart = 0

      }

      var i = 0

      // Write column values
      table.columns.foreach { column =>

        // foreach { case column: BoolColumn => ... } not working!

        column match {

          case column: BoolColumn => writers(i).write(column(r))
          case column: ByteColumn => writers(i).write(column(r))
          case column: CharColumn => writers(i).write(column(r))
          case column: DoubleColumn => writers(i).write(column(r))
          case column: FloatColumn => writers(i).write(column(r))
          case column: IntColumn => writers(i).write(column(r))
          case column: LongColumn => writers(i).write(column(r))
          case column: ShortColumn => writers(i).write(column(r))
          case column: MonthColumn => writers(i).write(column(r))
          case column: DateColumn => writers(i).write(column(r))
          case column: MinuteColumn => writers(i).write(column(r))
          case column: SecondColumn => writers(i).write(column(r))
          case column: TimeColumn => writers(i).write(column(r))
          case column: DateTimeColumn => writers(i).write(column(r))
          case column: TimestampColumn => writers(i).write(column(r))

          case column: SymbolColumn =>
            symbolCompanions(i).lastEnded +=
              column(r).toString.getBytes(
                symbolCompanions(i).writer.charset).size - 1

            writers(i).write(symbolCompanions(i).lastEnded)
            symbolCompanions(i).writer.write(column(r))

          case column: StringColumn =>
            stringCompanions(i).lastEnded +=
              column(r).getBytes(stringCompanions(i).writer.charset).size

            writers(i).write(stringCompanions(i).lastEnded)
            stringCompanions(i).writer.write(column(r))
        }

        i += 1

      }

      numberOfRecordsOnPart += 1

    }

    writeRecords(numberOfRecordsOnPart)
    releaseWriters

  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy