All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.mrpowers.spark.daria.utils.DirHelpers.scala Maven / Gradle / Ivy

package com.github.mrpowers.spark.daria.utils

import org.apache.spark.sql.SparkSession

object DirHelpers {

  lazy val spark: SparkSession = {
    SparkSession
      .builder()
      .master("local")
      .appName("spark session")
      .getOrCreate()
  }

  def numBytes(dirname: String): Long = {
    val filePath   = new org.apache.hadoop.fs.Path(dirname)
    val fileSystem = filePath.getFileSystem(spark.sparkContext.hadoopConfiguration)
    fileSystem.getContentSummary(filePath).getLength
  }

  def bytesToGb(bytes: Long): Long = {
    bytes / 1073741824L
  }

  def num1GBPartitions(gigabytes: Long): Int = {
    if (gigabytes == 0L) 1 else gigabytes.toInt
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy