All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.ebiznext.comet.schema.handlers.StorageHandler.scala Maven / Gradle / Ivy
/*
*
* * Licensed to the Apache Software Foundation (ASF) under one or more
* * contributor license agreements. See the NOTICE file distributed with
* * this work for additional information regarding copyright ownership.
* * The ASF licenses this file to You under the Apache License, Version 2.0
* * (the "License"); you may not use this file except in compliance with
* * the License. You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package com.ebiznext.comet.schema.handlers
import java.io.ByteArrayOutputStream
import java.time.{Instant, LocalDateTime, ZoneId}
import com.ebiznext.comet.config.Settings
import com.typesafe.scalalogging.StrictLogging
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._
import org.apache.spark.sql.execution.streaming.FileStreamSource.Timestamp
import scala.concurrent.duration.FiniteDuration
import scala.util.Try
/**
* Interface required by any filesystem manager
*/
trait StorageHandler extends StrictLogging {
def move(src: Path, dst: Path): Boolean
def delete(path: Path): Boolean
def exists(path: Path): Boolean
def mkdirs(path: Path): Boolean
def copyFromLocal(source: Path, dest: Path): Unit
def moveFromLocal(source: Path, dest: Path): Unit
def read(path: Path): String
def write(data: String, path: Path): Unit
def listDirectories(path: Path): List[Path]
def list(path: Path, extension: String = "", since: LocalDateTime = LocalDateTime.MIN): List[Path]
def blockSize(path: Path): Long
def contentSummary(path: Path): ContentSummary
def lastModified(path: Path): Timestamp
def spaceConsumed(path: Path): Long
def getOutputStream(path: Path): FSDataOutputStream
def touchz(path: Path): Try[Unit]
def touch(path: Path): Try[Unit]
def lockAcquisitionPollTime: FiniteDuration
def lockRefreshPollTime: FiniteDuration
def unzip(source: Path, targetDir: Path): Try[Unit]
}
/**
* HDFS Filesystem Handler
*/
class HdfsStorageHandler(fileSystem: Option[String])(implicit
settings: Settings
) extends StorageHandler {
val conf = new Configuration()
lazy val normalizedFileSystem: Option[String] = {
fileSystem.map { fs =>
if (fs.endsWith(":"))
fs + "///"
else if (!fs.endsWith("://") && fs.last == '/')
fs.dropRight(1)
else if (fs.endsWith("://"))
fs + "/."
else
fs
}
}
override def lockAcquisitionPollTime: FiniteDuration = settings.comet.lock.pollTime
override def lockRefreshPollTime: FiniteDuration = settings.comet.lock.refreshTime
normalizedFileSystem.foreach(fs => conf.set("fs.defaultFS", fs))
import scala.collection.JavaConverters._
settings.comet.hadoop.asScala.toMap.foreach {
case (k, v) =>
conf.set(k, v)
}
val fs: FileSystem = FileSystem.get(conf)
logger.info("fs=" + fs)
logger.info("fs.getHomeDirectory=" + fs.getHomeDirectory)
logger.info("fs.getUri=" + fs.getUri)
/**
* Gets the outputstream given a path
*
* @param path : path
* @return FSDataOutputStream
*/
def getOutputStream(path: Path): FSDataOutputStream = {
fs.delete(path, false)
val outputStream: FSDataOutputStream = fs.create(path)
outputStream
}
/**
* Read a UTF-8 text file into a string used to load yml configuration files
*
* @param path : Absolute file path
* @return file content as a string
*/
def read(path: Path): String = {
val stream = fs.open(path)
val content = IOUtils.toString(stream, "UTF-8")
content
}
/**
* Write a string to a UTF-8 text file. Used for yml configuration files.
*
* @param data file content as a string
* @param path : Absolute file path
*/
def write(data: String, path: Path): Unit = {
val os: FSDataOutputStream = getOutputStream(path)
os.writeBytes(data)
os.close()
}
/**
* Write bytes to binary file. Used for zip / gzip input test files.
*
* @param data file content as a string
* @param path : Absolute file path
*/
def writeBinary(data: Array[Byte], path: Path): Unit = {
val os: FSDataOutputStream = getOutputStream(path)
os.write(data, 0, data.length)
os.close()
}
def listDirectories(path: Path): List[Path] =
fs.listStatus(path).filter(_.isDirectory).map(_.getPath).toList
/**
* List all files in folder
*
* @param path Absolute folder path
* @param extension : Files should end with this string. To list all files, simply provide an empty string
* @param since Minimum modification time of liste files. To list all files, simply provide the beginning of all times
* @return List of Path
*/
def list(path: Path, extension: String, since: LocalDateTime): List[Path] = {
logger.info(s"list($path, $extension, $since)")
try {
val iterator: RemoteIterator[LocatedFileStatus] = fs.listFiles(path, false)
iterator
.filter { status =>
logger.info(s"found file=$status")
val time = LocalDateTime.ofInstant(
Instant.ofEpochMilli(status.getModificationTime),
ZoneId.systemDefault
)
time.isAfter(since) && status.getPath().getName().endsWith(extension)
}
.toList
.sortBy(r => (r.getModificationTime, r.getPath.getName))
.map((status: LocatedFileStatus) => status.getPath())
} catch {
case e: Throwable =>
logger.warn(s"Ignoring folder $path", e)
Nil
}
}
/**
* Move file
*
* @param path source path (file or folder)
* @param dest destination path (file or folder)
* @return
*/
override def move(path: Path, dest: Path): Boolean = {
delete(dest)
mkdirs(dest.getParent)
fs.rename(path, dest)
}
/**
* delete file (skip trash)
*
* @param path : Absolute path of file to delete
*/
override def delete(path: Path): Boolean = {
fs.delete(path, true)
}
/**
* Create folder if it does not exsit including any intermediary non existent folder
*
* @param path Absolute path of folder to create
*/
override def mkdirs(path: Path): Boolean = {
fs.mkdirs(path)
}
/**
* Copy file from local filesystem to target file system
*
* @param source Local file path
* @param dest destination file path
*/
override def copyFromLocal(source: Path, dest: Path): Unit = {
fs.copyFromLocalFile(source, dest)
}
/**
* Move file from local filesystem to target file system
* If source FS Scheme is not "file" then issue a regular move
* @param source Local file path
* @param dest destination file path
*/
override def moveFromLocal(source: Path, dest: Path): Unit = {
if (fs.getScheme() == "file")
fs.moveFromLocalFile(source, dest)
else
move(source, dest)
}
override def exists(path: Path): Boolean = {
fs.exists(path)
}
def blockSize(path: Path): Long = {
fs.getDefaultBlockSize(path)
}
def contentSummary(path: Path): ContentSummary = {
fs.getContentSummary(path)
}
def spaceConsumed(path: Path): Long = {
contentSummary(path).getSpaceConsumed
}
def lastModified(path: Path): Timestamp = {
fs.getFileStatus(path).getModificationTime
}
override def touchz(path: Path): Try[Unit] = {
Try(fs.create(path, false).close())
}
override def touch(path: Path): Try[Unit] = {
Try(fs.setTimes(path, System.currentTimeMillis(), -1))
}
override def unzip(sourceFile: Path, targetDir: Path): Try[Unit] = {
import java.util.zip.ZipInputStream
Try {
if (!fs.exists(sourceFile)) throw new Exception(sourceFile.toString + " does not exist")
val fsInputStream = fs.open(sourceFile)
val zipInputStream = new ZipInputStream(fsInputStream)
Stream
.continually(zipInputStream.getNextEntry)
.takeWhile(ze => ze != null && !ze.isDirectory)
.foreach { zipEntry =>
val entryName = zipEntry.getName()
val outputStream = new ByteArrayOutputStream()
val buf = new Array[Byte](4096)
var bytesRead = zipInputStream.read(buf, 0, 4096)
while (bytesRead > -1) {
outputStream.write(buf, 0, bytesRead)
bytesRead = zipInputStream.read(buf, 0, 4096)
}
outputStream.close()
zipInputStream.closeEntry()
}
zipInputStream.close()
}
}
}