All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.id.tmm.fetch.aws.s3.S3Store.scala Maven / Gradle / Ivy

The newest version!
package au.id.tmm.fetch.aws.s3

import au.id.tmm.digest4s.binarycodecs.syntax._
import au.id.tmm.digest4s.digest.MD5Digest
import au.id.tmm.digest4s.digest.syntax._
import au.id.tmm.fetch.aws.s3.S3Store.{S3CheckResult, S3KeyResolvedAgainstPrefix, Source}
import au.id.tmm.fetch.aws.{makeClientAsyncConfiguration, toIO}
import au.id.tmm.fetch.cache.KVStore
import au.id.tmm.fetch.files.Bytes
import au.id.tmm.utilities.errors.{ExceptionOr, GenericException}
import cats.effect.{IO, Resource}
import cats.syntax.traverse._
import software.amazon.awssdk.core.async.AsyncRequestBody
import software.amazon.awssdk.services.s3.S3AsyncClient
import software.amazon.awssdk.services.s3.model._
import sttp.client3.Response
import sttp.model.{HeaderNames, MediaType}

import scala.collection.immutable.ArraySeq
import scala.collection.mutable
import scala.jdk.CollectionConverters.MapHasAsScala

class S3Store private (
  bucket: S3BucketName,
  namePrefix: Option[S3Key],
  s3Client: S3AsyncClient,
) extends KVStore[IO, S3Key, S3Store.Source, S3ObjectRef] {
  override def get(k: S3Key): IO[Option[S3ObjectRef]] =
    contains(k).map {
      case true  => Some(makeObjectRefFor(resolve(k)))
      case false => None
    }

  override def contains(k: S3Key): IO[Boolean] = checkKey(resolve(k)).map {
    case S3CheckResult.NoObjectAtKey                                                => false
    case S3CheckResult.ObjectAtKeyMissingMetadata | S3CheckResult.ObjectAtKey(_, _) => true
  }

  override def put(k: S3Key, source: S3Store.Source): IO[S3ObjectRef] =
    for {
      resolvedKey <- IO.pure(resolve(k))
      checkResult <- checkKey(resolvedKey)
      _ <- checkResult match {
        case S3CheckResult.NoObjectAtKey | S3CheckResult.ObjectAtKeyMissingMetadata =>
          s3Put(resolvedKey, source, source.bytes.md5)

        case S3CheckResult.ObjectAtKey(existingObjectContentType, existingObjectChecksum) =>
          for {
            digest <- IO.pure(source.bytes.md5)
            _ <-
              if (existingObjectChecksum == digest && source.contentType.contains(existingObjectContentType)) {
                IO.unit
              } else {
                s3Put(resolvedKey, source, digest)
              }
          } yield ()
      }
    } yield makeObjectRefFor(resolvedKey)

  override def drop(k: S3Key): IO[Unit] = {
    val request = DeleteObjectRequest
      .builder()
      .bucket(bucket.asString)
      .key(resolve(k).asKey.toRaw)
      .build()

    toIO(IO(s3Client.deleteObject(request))).as(())
  }

  private def resolve(givenKey: S3Key): S3KeyResolvedAgainstPrefix = namePrefix match {
    case Some(prefix) => S3KeyResolvedAgainstPrefix(prefix.resolve(givenKey))
    case None         => S3KeyResolvedAgainstPrefix(givenKey)
  }

  private def makeObjectRefFor(resolvedKey: S3KeyResolvedAgainstPrefix) =
    S3ObjectRef(bucket, resolvedKey.asKey)

  private def checkKey(key: S3KeyResolvedAgainstPrefix): IO[S3CheckResult] = {
    val request: HeadObjectRequest = HeadObjectRequest
      .builder()
      .bucket(bucket.asString)
      .key(key.asKey.toRaw)
      .build()

    val maybeSdkHeadResponse: IO[Option[HeadObjectResponse]] =
      toIO(IO(s3Client.headObject(request)))
        .map[Option[HeadObjectResponse]](Some(_))
        .recover { case _: NoSuchKeyException =>
          None
        }

    maybeSdkHeadResponse.flatMap {
      case Some(response) => IO.fromEither(checkResultFrom(response))
      case None           => IO.pure(S3CheckResult.NoObjectAtKey)
    }
  }

  private def checkResultFrom(sdkHeadResponse: HeadObjectResponse): ExceptionOr[S3CheckResult] = {
    val metadataMap: mutable.Map[String, String] = sdkHeadResponse.metadata.asScala

    val contentType: Option[String] = metadataMap.get("Content-Type")

    val errorOrDigest: ExceptionOr[Option[MD5Digest]] = metadataMap
      .get("Content-MD5")
      .traverse(_.parseBase64.map(MD5Digest.apply))

    errorOrDigest.map { digest =>
      (contentType, digest) match {
        case (Some(contentType), Some(digest)) => S3CheckResult.ObjectAtKey(contentType, digest)
        case _                                 => S3CheckResult.ObjectAtKeyMissingMetadata
      }
    }
  }

  private def s3Put(
    key: S3KeyResolvedAgainstPrefix,
    source: Source,
    sourceMd5: MD5Digest,
  ): IO[Unit] = {
    val request = PutObjectRequest
      .builder()
      .bucket(bucket.asString)
      .key(key.asKey.toRaw)
      .contentType(source.contentType.map(_.toString).orNull)
      .contentMD5(sourceMd5.asBase64String)
      .build()

    val body = AsyncRequestBody.fromBytes(Bytes.toByteArrayUnsafe(source.bytes))

    toIO(IO(s3Client.putObject(request, body))).as(())
  }

}

object S3Store {

  /**
    * Attempts to use the `ExecutionContext` of `IO` as the `Executor` for the `S3AsyncClient`. Just uses the default if
    * this fails.
    */
  def apply(bucket: S3BucketName, namePrefix: Option[S3Key]): Resource[IO, S3Store] =
    for {
      s3Client <- s3ClientResource
    } yield new S3Store(bucket, namePrefix, s3Client)

  private val s3ClientResource: Resource[IO, S3AsyncClient] =
    Resource.fromAutoCloseable {
      for {
        clientAsyncConfiguration <- makeClientAsyncConfiguration
      } yield S3AsyncClient.builder().asyncConfiguration(clientAsyncConfiguration).build()
    }

  // TODO this is too generic to be specifically on the S3 store companion
  final case class Source(
    bytes: ArraySeq[Byte], // TODO not sure about the type here
    contentType: Option[MediaType],
  )

  object Source {
    def fromSttpResponse(response: Response[Either[_, Array[Byte]]]): ExceptionOr[Source] =
      for {
        bytes <- response.body
          .map(bytes => new ArraySeq.ofByte(bytes))
          .left
          .map(errorResponse =>
            GenericException(s"Http response code was ${response.code.code}, message was $errorResponse"),
          )
        contentType = response
          .header(HeaderNames.ContentType)
          .flatMap(s => MediaType.parse(s).toOption)
      } yield Source(bytes, contentType)
  }

  private sealed trait S3CheckResult

  private object S3CheckResult {
    case object NoObjectAtKey                                                    extends S3CheckResult
    case object ObjectAtKeyMissingMetadata                                       extends S3CheckResult
    final case class ObjectAtKey(objectContentType: String, checksum: MD5Digest) extends S3CheckResult
  }

  private final case class S3KeyResolvedAgainstPrefix(asKey: S3Key)

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy