All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.seabow.datax.common.ObjectStorageUtils.scala Maven / Gradle / Ivy

package io.github.seabow.datax.common

import org.apache.hadoop.fs.{ContentSummary, FileStatus, Path}

import scala.collection.mutable.ListBuffer
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration.Duration
/**
 *  As some object storages works bad in remove files. This class give some resolution:
 *  recursely list and cleanup files in dirs concurrently.
 */
object ObjectStorageUtils {
  private def recurseDelete(status: FileStatus,ec:ExecutionContext): Future[Any] = {
    if (status.isDirectory) {
      if (status.getLen > 0) {
        val eventualUnits = HdfsUtils.listStatus(status.getPath.toString).map {
          status =>
            recurseDelete(status,ec)
        }
        Await.result(Future.sequence(eventualUnits.toSeq), scala.concurrent.duration.Duration.Inf)
      }
      Future{
        HdfsUtils.delete(status.getPath.toString)
        println(s"Deleted ${status.getPath.toString}")
      }(ec)
    }
    else {
      Future {
        HdfsUtils.delete(status.getPath.toString);
        println(s"Deleted ${status.getPath.toString}")
      }(ec)
    }
  }

  def recurseCopy(srcStatus:FileStatus,dstPath:String,ec:ExecutionContext): Future[Any]= {
    val futureTasks=ListBuffer.empty[Future[Any]]
    val srcPath=srcStatus.getPath.toString
    if (srcStatus.isDirectory) {
      val subStatuses=HdfsUtils.listStatus(srcStatus.getPath.toString)
      if (subStatuses.length>0){
       subStatuses.foreach {
          status =>
            recurseCopy(status,dstPath+Path.SEPARATOR+status.getPath.getName,ec)
         }
        }
    }else{
     val future= Future{
       if(HdfsUtils.exist(dstPath)) {
           val len=HdfsUtils.getStatus(dstPath).getLen
           if(HdfsUtils.getStatus(srcPath).getLen!=len){
             HdfsUtils.copy(srcPath,dstPath)
           }
         }else{
         HdfsUtils.copy(srcPath,dstPath)
       }
       println(s"copied $srcPath -> $dstPath")
      }(ec)
      futureTasks.append(future)
    }
    Future.sequence(futureTasks)
  }

  def deleteObjectStorageDir(dirPath:String,ec:ExecutionContext):Unit={
    Await.result(recurseDelete(HdfsUtils.getStatus(dirPath),ec),Duration.Inf)
  }

  def deleteObjectStorageDirs(dirPaths:Seq[String],ec:ExecutionContext):Unit={
    val delTasks=dirPaths.map(dirPath=>recurseDelete(HdfsUtils.getStatus(dirPath),ec))
    Await.result(Future.sequence(delTasks),Duration.Inf)
  }

  def  copyObjectStorageDir(srcDir:String,dstDir:String,executionContext: ExecutionContext):Unit={
    Await.result(recurseCopy(HdfsUtils.getStatus(srcDir),dstDir,executionContext),Duration.Inf)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy