akka.stream.alpakka.s3.impl.S3Stream.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of akka-stream-alpakka-s3_2.11 Show documentation
Show all versions of akka-stream-alpakka-s3_2.11 Show documentation
Alpakka is a Reactive Enterprise Integration library for Java and Scala, based on Reactive Streams and Akka.
/*
* Copyright (C) 2016-2018 Lightbend Inc.
*/
package akka.stream.alpakka.s3.impl
import java.time.{Instant, LocalDate}
import scala.collection.immutable.Seq
import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success}
import akka.{Done, NotUsed}
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.StatusCodes.{NoContent, NotFound, OK}
import akka.http.scaladsl.model.headers.{`Content-Length`, ByteRange, CustomHeader}
import akka.http.scaladsl.model._
import akka.http.scaladsl.unmarshalling.{Unmarshal, Unmarshaller}
import akka.stream.Materializer
import akka.stream.alpakka.s3.auth.{CredentialScope, Signer, SigningKey}
import akka.stream.alpakka.s3.scaladsl.{ListBucketResultContents, ObjectMetadata}
import akka.stream.alpakka.s3.{DiskBufferType, MemoryBufferType, S3Exception, S3Settings}
import akka.stream.scaladsl.{Flow, Keep, RunnableGraph, Sink, Source}
import akka.util.ByteString
final case class S3Location(bucket: String, key: String)
final case class MultipartUpload(s3Location: S3Location, uploadId: String)
sealed trait UploadPartResponse {
def multipartUpload: MultipartUpload
def index: Int
}
final case class SuccessfulUploadPart(multipartUpload: MultipartUpload, index: Int, etag: String)
extends UploadPartResponse
final case class FailedUploadPart(multipartUpload: MultipartUpload, index: Int, exception: Throwable)
extends UploadPartResponse
final case class FailedUpload(reasons: Seq[Throwable]) extends Exception(reasons.map(_.getMessage).mkString(", "))
final case class CompleteMultipartUploadResult(location: Uri, bucket: String, key: String, etag: String)
final case class ListBucketResult(isTruncated: Boolean,
continuationToken: Option[String],
contents: Seq[ListBucketResultContents])
sealed trait ApiVersion {
def getInstance: ApiVersion
}
case object ListBucketVersion1 extends ApiVersion {
override val getInstance: ApiVersion = ListBucketVersion1
}
case object ListBucketVersion2 extends ApiVersion {
override val getInstance: ApiVersion = ListBucketVersion2
}
final case class CopyPartResult(lastModified: Instant, eTag: String)
final case class CopyPartition(partNumber: Int, sourceLocation: S3Location, range: Option[ByteRange.Slice] = None)
final case class MultipartCopy(multipartUpload: MultipartUpload, copyPartition: CopyPartition)
object S3Stream {
def apply(settings: S3Settings)(implicit system: ActorSystem, mat: Materializer): S3Stream =
new S3Stream(settings)
}
private[alpakka] final class S3Stream(settings: S3Settings)(implicit system: ActorSystem, mat: Materializer) {
import HttpRequests._
import Marshalling._
implicit val conf = settings
val MinChunkSize = 5242880 //in bytes
// def because tokens can expire
def signingKey = SigningKey(
settings.credentialsProvider,
CredentialScope(LocalDate.now(), settings.s3RegionProvider.getRegion, "s3")
)
def download(s3Location: S3Location,
range: Option[ByteRange],
sse: Option[ServerSideEncryption]): (Source[ByteString, NotUsed], Future[ObjectMetadata]) = {
import mat.executionContext
val s3Headers = S3Headers(sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(GetObject) })
val future = request(s3Location, rangeOption = range, s3Headers = s3Headers)
val source = Source
.fromFuture(future.flatMap(entityForSuccess))
.map(_.dataBytes)
.flatMapConcat(identity)
val meta = future.map(resp ⇒ computeMetaData(resp.headers, resp.entity))
(source, meta)
}
def listBucket(bucket: String, prefix: Option[String] = None): Source[ListBucketResultContents, NotUsed] = {
sealed trait ListBucketState
case object Starting extends ListBucketState
case class Running(continuationToken: String) extends ListBucketState
case object Finished extends ListBucketState
import system.dispatcher
def listBucketCall(token: Option[String]): Future[Option[(ListBucketState, Seq[ListBucketResultContents])]] =
signAndGetAs[ListBucketResult](HttpRequests.listBucket(bucket, prefix, token))
.map { (res: ListBucketResult) =>
Some(
res.continuationToken
.fold[(ListBucketState, Seq[ListBucketResultContents])]((Finished, res.contents))(
t => (Running(t), res.contents)
)
)
}
Source
.unfoldAsync[ListBucketState, Seq[ListBucketResultContents]](Starting) {
case Finished => Future.successful(None)
case Starting => listBucketCall(None)
case Running(token) => listBucketCall(Some(token))
}
.mapConcat(identity)
}
def getObjectMetadata(bucket: String,
key: String,
sse: Option[ServerSideEncryption]): Future[Option[ObjectMetadata]] = {
implicit val ec = mat.executionContext
val s3Headers = S3Headers(sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(HeadObject) })
request(S3Location(bucket, key), HttpMethods.HEAD, s3Headers = s3Headers).flatMap {
case HttpResponse(OK, headers, entity, _) =>
entity.discardBytes().future().map { _ =>
Some(computeMetaData(headers, entity))
}
case HttpResponse(NotFound, _, entity, _) =>
entity.discardBytes().future().map(_ => None)
case HttpResponse(_, _, entity, _) =>
Unmarshal(entity).to[String].map { err =>
throw new S3Exception(err)
}
}
}
def deleteObject(s3Location: S3Location): Future[Done] = {
implicit val ec = mat.executionContext
request(s3Location, HttpMethods.DELETE).flatMap {
case HttpResponse(NoContent, _, entity, _) =>
entity.discardBytes().future().map(_ => Done)
case HttpResponse(_, _, entity, _) =>
Unmarshal(entity).to[String].map { err =>
throw new S3Exception(err)
}
}
}
def putObject(s3Location: S3Location,
contentType: ContentType,
data: Source[ByteString, _],
contentLength: Long,
s3Headers: S3Headers,
sse: Option[ServerSideEncryption]): Future[ObjectMetadata] = {
// TODO can we take in a Source[ByteString, NotUsed] without forcing chunking
// chunked requests are causing S3 to think this is a multipart upload
implicit val ec: ExecutionContext = mat.executionContext
val headers = S3Headers(
s3Headers.headers ++ sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(PutObject) }
)
val req = uploadRequest(s3Location, data, contentLength, contentType, headers)
val resp = for {
signedRequest <- Signer.signedRequest(req, signingKey)
resp <- Http().singleRequest(signedRequest)
} yield resp
resp.flatMap {
case HttpResponse(OK, h, entity, _) =>
entity.discardBytes().future().map { _ =>
ObjectMetadata(h :+ `Content-Length`(entity.contentLengthOption.getOrElse(0)))
}
case HttpResponse(_, _, entity, _) =>
Unmarshal(entity).to[String].map { err =>
throw new S3Exception(err)
}
}
}
def request(s3Location: S3Location,
method: HttpMethod = HttpMethods.GET,
rangeOption: Option[ByteRange] = None,
s3Headers: S3Headers = S3Headers.empty): Future[HttpResponse] =
signAndGet(requestHeaders(getDownloadRequest(s3Location, method, s3Headers), rangeOption))
private def requestHeaders(downloadRequest: HttpRequest, rangeOption: Option[ByteRange]): HttpRequest =
rangeOption match {
case Some(range) => downloadRequest.addHeader(headers.Range(range))
case _ => downloadRequest
}
/**
* Uploads a stream of ByteStrings to a specified location as a multipart upload.
*/
def multipartUpload(
s3Location: S3Location,
contentType: ContentType = ContentTypes.`application/octet-stream`,
s3Headers: S3Headers,
sse: Option[ServerSideEncryption] = None,
chunkSize: Int = MinChunkSize,
chunkingParallelism: Int = 4
): Sink[ByteString, Future[CompleteMultipartUploadResult]] =
chunkAndRequest(s3Location, contentType, s3Headers, chunkSize, sse)(chunkingParallelism)
.toMat(completionSink(s3Location))(Keep.right)
private def initiateMultipartUpload(s3Location: S3Location,
contentType: ContentType,
s3Headers: S3Headers): Future[MultipartUpload] = {
import mat.executionContext
val req = initiateMultipartUploadRequest(s3Location, contentType, s3Headers)
val response = for {
signedReq <- Signer.signedRequest(req, signingKey)
response <- Http().singleRequest(signedReq)
} yield response
response.flatMap {
case HttpResponse(status, _, entity, _) if status.isSuccess() =>
Unmarshal(entity).to[MultipartUpload]
case HttpResponse(_, _, entity, _) =>
Unmarshal(entity).to[String].map { err =>
throw new S3Exception(err)
}
}
}
def multipartCopy(
sourceLocation: S3Location,
targetLocation: S3Location,
sourceVersionId: Option[String] = None,
contentType: ContentType = ContentTypes.`application/octet-stream`,
s3Headers: S3Headers,
sse: Option[ServerSideEncryption] = None,
chunkSize: Int = MinChunkSize,
chunkingParallelism: Int = 4
): RunnableGraph[Future[CompleteMultipartUploadResult]] = {
import mat.executionContext
// Pre step get source meta to get content length (size of the object)
val eventualMaybeObjectSize: Future[Option[Long]] =
getObjectMetadata(sourceLocation.bucket, sourceLocation.key, sse).map(_.map(_.contentLength))
val eventualPartitions =
eventualMaybeObjectSize.map(_.map(createPartitions(chunkSize, sourceLocation)).getOrElse(Nil))
val partitions = Source.fromFuture(eventualPartitions)
// Multipart copy upload requests (except for the completion api) are created here.
// The initial copy upload request gets executed within this function as well.
// The individual copy upload part requests are created.
val copyRequests =
createCopyRequests(targetLocation, sourceVersionId, contentType, s3Headers, sse, partitions)(chunkingParallelism)
// The individual copy upload part requests are processed here
processUploadCopyPartRequests(copyRequests)(chunkingParallelism)
.toMat(completionSink(targetLocation))(Keep.right)
}
private def computeMetaData(headers: Seq[HttpHeader], entity: ResponseEntity): ObjectMetadata =
ObjectMetadata(
headers ++
Seq(
`Content-Length`(entity.contentLengthOption.getOrElse(0)),
CustomContentTypeHeader(entity.contentType)
)
)
//`Content-Type` header is by design not accessible as header. So need to have a custom
//header implementation to expose that
private case class CustomContentTypeHeader(contentType: ContentType) extends CustomHeader {
override def name(): String = "Content-Type"
override def value(): String = contentType.value
override def renderInRequests(): Boolean = true
override def renderInResponses(): Boolean = true
}
private def completeMultipartUpload(s3Location: S3Location,
parts: Seq[SuccessfulUploadPart]): Future[CompleteMultipartUploadResult] = {
import mat.executionContext
for (req <- completeMultipartUploadRequest(parts.head.multipartUpload, parts.map(p => p.index -> p.etag));
res <- signAndGetAs[CompleteMultipartUploadResult](req)) yield res
}
/**
* Initiates a multipart upload. Returns a source of the initiated upload with upload part indicess
*/
private def initiateUpload(s3Location: S3Location,
contentType: ContentType,
s3Headers: S3Headers): Source[(MultipartUpload, Int), NotUsed] =
Source
.single(s3Location)
.mapAsync(1)(initiateMultipartUpload(_, contentType, s3Headers))
.mapConcat(r => Stream.continually(r))
.zip(Source.fromIterator(() => Iterator.from(1)))
val atLeastOneByteString = Flow[ByteString].orElse(Source.single(ByteString.empty))
private def createRequests(
s3Location: S3Location,
contentType: ContentType,
s3Headers: S3Headers,
chunkSize: Int,
parallelism: Int,
sse: Option[ServerSideEncryption]
): Flow[ByteString, (HttpRequest, (MultipartUpload, Int)), NotUsed] = {
assert(
chunkSize >= MinChunkSize,
"Chunk size must be at least 5242880B. See http://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html"
)
// First step of the multi part upload process is made.
// The response is then used to construct the subsequent individual upload part requests
val requestInfo: Source[(MultipartUpload, Int), NotUsed] =
initiateUpload(s3Location,
contentType,
S3Headers(
s3Headers.headers ++
sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(InitiateMultipartUpload) }
))
// use the same key for all sub-requests (chunks)
val key: SigningKey = signingKey
val headers: S3Headers = S3Headers(sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(UploadPart) })
SplitAfterSize(chunkSize)(atLeastOneByteString)
.via(getChunkBuffer(chunkSize)) //creates the chunks
.concatSubstreams
.zipWith(requestInfo) {
case (chunkedPayload, (uploadInfo, chunkIndex)) =>
//each of the payload requests are created
val partRequest =
uploadPartRequest(uploadInfo, chunkIndex, chunkedPayload.data, chunkedPayload.size, headers)
(partRequest, (uploadInfo, chunkIndex))
}
.mapAsync(parallelism) { case (req, info) => Signer.signedRequest(req, key).zip(Future.successful(info)) }
}
private def getChunkBuffer(chunkSize: Int) = settings.bufferType match {
case MemoryBufferType =>
new MemoryBuffer(chunkSize * 2)
case d @ DiskBufferType(_) =>
new DiskBuffer(2, chunkSize * 2, d.path)
}
private def chunkAndRequest(
s3Location: S3Location,
contentType: ContentType,
s3Headers: S3Headers,
chunkSize: Int,
sse: Option[ServerSideEncryption]
)(parallelism: Int): Flow[ByteString, UploadPartResponse, NotUsed] = {
// Multipart upload requests (except for the completion api) are created here.
// The initial upload request gets executed within this function as well.
// The individual upload part requests are created.
val requestFlow = createRequests(s3Location, contentType, s3Headers, chunkSize, parallelism, sse)
// The individual upload part requests are processed here
requestFlow
.via(Http().superPool[(MultipartUpload, Int)]())
.map {
case (Success(r), (upload, index)) =>
r.entity.dataBytes.runWith(Sink.ignore)
val etag = r.headers.find(_.lowercaseName() == "etag").map(_.value)
etag
.map((t) => SuccessfulUploadPart(upload, index, t))
.getOrElse(FailedUploadPart(upload, index, new RuntimeException(s"Cannot find etag in ${r}")))
case (Failure(e), (upload, index)) => FailedUploadPart(upload, index, e)
}
}
private def completionSink(
s3Location: S3Location
): Sink[UploadPartResponse, Future[CompleteMultipartUploadResult]] = {
import mat.executionContext
Sink.seq[UploadPartResponse].mapMaterializedValue { responseFuture: Future[Seq[UploadPartResponse]] =>
responseFuture
.flatMap { responses: Seq[UploadPartResponse] =>
val successes = responses.collect { case r: SuccessfulUploadPart => r }
val failures = responses.collect { case r: FailedUploadPart => r }
if (responses.isEmpty) {
Future.failed(new RuntimeException("No Responses"))
} else if (failures.isEmpty) {
Future.successful(successes.sortBy(_.index))
} else {
Future.failed(FailedUpload(failures.map(_.exception)))
}
}
.flatMap(completeMultipartUpload(s3Location, _))
}
}
private def signAndGetAs[T](request: HttpRequest)(implicit um: Unmarshaller[ResponseEntity, T]): Future[T] = {
import mat.executionContext
for (response <- signAndGet(request);
entity <- entityForSuccess(response);
t <- Unmarshal(entity).to[T]) yield t
}
private def signAndGet(request: HttpRequest): Future[HttpResponse] = {
import mat.executionContext
for {
req <- Signer.signedRequest(request, signingKey)
res <- Http().singleRequest(req)
} yield res
}
private def entityForSuccess(resp: HttpResponse)(implicit ctx: ExecutionContext): Future[ResponseEntity] =
resp match {
case HttpResponse(status, _, entity, _) if status.isSuccess() && !status.isRedirection() =>
Future.successful(entity)
case HttpResponse(_, _, entity, _) =>
Unmarshal(entity).to[String].map { err =>
throw new S3Exception(err)
}
}
private[impl] def createPartitions(chunkSize: Int,
sourceLocation: S3Location)(objectSize: Long): List[CopyPartition] =
if (objectSize <= 0 || objectSize < chunkSize) CopyPartition(1, sourceLocation) :: Nil
else {
((0L until objectSize by chunkSize).toList :+ objectSize)
.sliding(2)
.toList
.zipWithIndex
.map {
case (ls, index) => CopyPartition(index + 1, sourceLocation, Some(ByteRange(ls.head, ls.last)))
}
}
private def createCopyRequests(
location: S3Location,
sourceVersionId: Option[String],
contentType: ContentType,
s3Headers: S3Headers,
sse: Option[ServerSideEncryption],
partitions: Source[List[CopyPartition], NotUsed]
)(parallelism: Int) = {
val requestInfo: Source[(MultipartUpload, Int), NotUsed] =
initiateUpload(location,
contentType,
S3Headers(
s3Headers.headers ++
sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(InitiateMultipartUpload) }
))
// use the same key for all sub-requests (chunks)
val key: SigningKey = signingKey
val headers: S3Headers = S3Headers(sse.fold[Seq[HttpHeader]](Seq.empty) { _.headersFor(CopyPart) })
requestInfo
.zipWith(partitions) {
case ((upload, _), ls) =>
ls.map { cp =>
val multipartCopy = MultipartCopy(upload, cp)
val request = uploadCopyPartRequest(multipartCopy, sourceVersionId, headers)
(request, multipartCopy)
}
}
.mapConcat(identity)
.mapAsync(parallelism) {
case (req, info) => Signer.signedRequest(req, key).zip(Future.successful(info))
}
}
private def processUploadCopyPartRequests(
requests: Source[(HttpRequest, MultipartCopy), NotUsed]
)(parallelism: Int) = {
import mat.executionContext
requests
.via(Http().superPool[MultipartCopy]())
.map {
case (Success(r), multipartCopy) =>
val entity = r.entity
val upload = multipartCopy.multipartUpload
val index = multipartCopy.copyPartition.partNumber
import StatusCodes._
r.status match {
case OK =>
Unmarshal(entity).to[CopyPartResult].map(cp => SuccessfulUploadPart(upload, index, cp.eTag))
case statusCode: StatusCode =>
Unmarshal(entity).to[String].map { err =>
val response = Option(err).getOrElse(s"Failed to upload part into S3, status code was: $statusCode")
throw new S3Exception(response)
}
}
case (Failure(ex), multipartCopy) =>
Future.successful(FailedUploadPart(multipartCopy.multipartUpload, multipartCopy.copyPartition.partNumber, ex))
}
.mapAsync(parallelism)(identity)
}
}