All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.johnsnowlabs.client.util.CloudHelper.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017-2023 John Snow Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.johnsnowlabs.client.util

import com.johnsnowlabs.nlp.util.io.CloudStorageType
import com.johnsnowlabs.nlp.util.io.CloudStorageType.CloudStorageType

import java.net.{URI, URL}

object CloudHelper {

  def parseS3URI(s3URI: String, includePrefixInKey: Boolean = false): (String, String) = {
    val prefix = if (s3URI.startsWith("s3:")) "s3://" else "s3a://"
    val bucketName = s3URI.substring(prefix.length).split("/").head
    val key = s3URI.substring((prefix + bucketName).length + 1)

    require(bucketName.nonEmpty, "S3 bucket name is empty!")
    (bucketName, if (includePrefixInKey) prefix + key else key)
  }

  def parseGCPStorageURI(gcpStorageURI: String): (String, String) = {
    val prefix = "gs://"
    val bucketName = gcpStorageURI.substring(prefix.length).split("/").head
    val storagePath = gcpStorageURI.substring((prefix + bucketName).length + 1)

    require(bucketName.nonEmpty, "GCP Storage bucket name is empty!")

    (bucketName, storagePath)
  }

  def parseAzureBlobURI(azureBlobURI: String): (String, String) = {
    val uri = new URI(azureBlobURI)
    val parts = uri.getPath.stripPrefix("/").split("/", 2)
    val containerName = parts(0)
    require(containerName.nonEmpty, "Azure container name is empty!")
    val blobPath = if (parts.length > 1) parts(1) else ""

    (containerName, blobPath)
  }

  def getAccountNameFromAzureBlobURI(azureBlobURI: String): String = {
    val uri = new URI(azureBlobURI)
    val host = uri.getHost
    val accountName = host.stripSuffix(".blob.core.windows.net")
    require(accountName.nonEmpty, "Azure storage account name is empty!")
    accountName
  }

  def transformURIToWASB(azureURI: String): String = {
    val url = new URL(azureURI)
    val host = url.getHost
    val pathParts = url.getPath.split("/").filter(_.nonEmpty)
    val container = if (pathParts.nonEmpty) pathParts(0) else ""
    require(container.nonEmpty, "Azure container name is empty!")
    val pathWithoutContainer = if (pathParts.length > 1) pathParts.drop(1).mkString("/") else ""

    s"wasbs://$container@$host/$pathWithoutContainer/"
  }

  def isCloudPath(uri: String): Boolean = {
    isS3Path(uri) || isGCPStoragePath(uri) || isAzureBlobPath(uri)
  }

  def isS3Path(uri: String): Boolean = {
    uri.startsWith("s3://") || uri.startsWith("s3a://")
  }

  private def isGCPStoragePath(uri: String): Boolean = uri.startsWith("gs://")

  private def isAzureBlobPath(uri: String): Boolean = {
    uri.startsWith("https://") && uri.contains(".blob.core.windows.net/")
  }

  def cloudType(uri: String): CloudStorageType = {
    if (isS3Path(uri)) {
      CloudStorageType.S3
    } else if (isGCPStoragePath(uri)) {
      CloudStorageType.GCP
    } else if (isAzureBlobPath(uri)) {
      CloudStorageType.Azure
    } else throw new UnsupportedOperationException(s"Unsupported URI scheme: $uri")
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy