com.github.cloudfiles.onedrive.OneDriveUpload.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cloud-files-onedrive_2.12 Show documentation
Adds support for Microsoft's OneDrive protocol
The newest version!
/*
 * Copyright 2020-2024 The Developers Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License")
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.cloudfiles.onedrive

import com.github.cloudfiles.core.http.HttpRequestSender
import com.github.cloudfiles.core.http.auth.AuthExtension
import com.github.cloudfiles.onedrive.OneDriveJsonProtocol._
import com.github.cloudfiles.onedrive.OneDriveUpload.UploadStreamCoordinatorActor.{NextUploadChunk, UploadChunk, UploadStreamCoordinationMessage}
import org.apache.pekko.actor.typed.scaladsl.AskPattern._
import org.apache.pekko.actor.typed.scaladsl.Behaviors
import org.apache.pekko.actor.typed.{ActorRef, ActorSystem, Behavior}
import org.apache.pekko.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import org.apache.pekko.http.scaladsl.model._
import org.apache.pekko.http.scaladsl.model.headers.{ModeledCustomHeader, ModeledCustomHeaderCompanion}
import org.apache.pekko.http.scaladsl.unmarshalling.Unmarshal
import org.apache.pekko.stream._
import org.apache.pekko.stream.scaladsl.{Sink, Source}
import org.apache.pekko.stream.stage._
import org.apache.pekko.util.{ByteString, Timeout}

import java.util.concurrent.atomic.AtomicInteger
import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success, Try}

/**
 * A module implementing functionality related to file uploads to a OneDrive
 * server.
 *
 * Uploads to OneDrive are pretty complicated because they require multiple
 * steps. (There is also a direct upload mechanism, but this is supported for
 * small files only; as the files to be processed can have an arbitrary size,
 * the complex mechanism is used here.)
 *
 * In a first step a so-called ''upload session'' has to be created. This is
 * done by sending a special HTTP request to the server. The response is a
 * JSON document that - among other information - contains an upload URL. With
 * the upload URL available, the file's content can be sent to this URL in
 * one or multiple requests. There is a size restriction for a single request
 * (of ~60 MB, but the chunk size can be configured in the OneDrive config);
 * so it may be necessary to split the file into multiple requests.
 *
 * This module provides a function that creates a source of HTTP requests to
 * upload the single chunks of a file. The source is populated from a stream
 * with the content of the file to be uploaded. This is pretty tricky because
 * multiple streams have to be coordinated. The stream with the file content
 * is mapped by a custom flow stage to a stream of HTTP requests. The entity
 * of each request is defined by a source that is fed by the main stream.
 * Unfortunately, there is no default operator for this use case available;
 * therefore, a custom flow stage and a custom source have been implemented
 * that use a special actor for their coordination.
 */
private object OneDriveUpload {
  private val NameCounter = new AtomicInteger

  /**
   * A source implementation that provides the data for a single request to
   * upload a chunk of a file.
   *
   * The stream with the file's content is split into multiple requests with
   * a configurable chunk size. For each request, an instance of this class
   * is created. The instance uses the stream coordinator provided to obtain
   * blocks of data.
   *
   * @param config            the OneDrive configuration
   * @param streamCoordinator the stream coordinator actor
   * @param ec                the execution context
   */
  class UploadRequestSource(config: OneDriveConfig, streamCoordinator: ActorRef[UploadStreamCoordinationMessage])
                           (implicit ec: ExecutionContext, system: ActorSystem[_])
    extends GraphStage[SourceShape[ByteString]] {
    val out: Outlet[ByteString] = Outlet("UploadRequestSource")

    /** Timeout for communication with the coordinator actor. */
    private implicit val timeout: Timeout = config.timeout

    override def shape: SourceShape[ByteString] = SourceShape(out)

    override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
      new GraphStageLogic(shape) {
        setHandler(out, new OutHandler {
          override def onPull(): Unit = {
            val callback = getAsyncCallback[Try[UploadChunk]](chunkAvailable)
            requestUploadChunk() onComplete callback.invoke
          }
        })

        /**
         * Asks the stream coordinator actor for the next block of data.
         *
         * @return a future with the response from the actor
         */
        private def requestUploadChunk(): Future[UploadChunk] =
          streamCoordinator.ask(ref => NextUploadChunk(ref))

        /**
         * Handles a response from the stream coordinator actor. If new data
         * is available, it is passed downstream. If the current chunk is
         * complete (indicated by an empty block of data), the source is
         * completed. Failures from upstream are also handled.
         *
         * @param triedChunk a ''Try'' with the next block of data
         */
        private def chunkAvailable(triedChunk: Try[UploadChunk]): Unit = {
          triedChunk match {
            case Success(chunk) =>
              if (chunk.data.nonEmpty) {
                push(out, chunk.data)
              } else {
                completeStage()
              }

            case Failure(exception) =>
              failStage(exception)
          }
        }
      }
  }

  /**
   * A custom flow stage implementation to split the stream with the content
   * of a file into multiple upload requests.
   *
   * The class receives blocks of data from upstream and passes them to the
   * stream coordinator actor. From there they can be queried by the source
   * that produces the content of upload requests.
   *
   * Each pull signal from downstream generates another HTTP request to upload
   * a chunk of the file affected. The requests have a special header to
   * indicate which part of the file is uploaded. The stream coordinator actor
   * keeps track when a chunk is complete, so that the next upload request can
   * be started.
   *
   * @param config    the OneDrive configuration
   * @param uploadUri the URI where to upload the data
   * @param fileSize  the size of the file to be uploaded
   * @param ec        the execution context
   * @param system    the actor system
   */
  class UploadBytesToRequestFlow(config: OneDriveConfig, uploadUri: Uri, fileSize: Long)
                                (implicit ec: ExecutionContext, system: ActorSystem[_])
    extends GraphStage[FlowShape[ByteString, HttpRequest]] {
    val in: Inlet[ByteString] = Inlet("UploadBytesToRequestFlow.in")
    val out: Outlet[HttpRequest] = Outlet("UploadBytesToRequestFlow.out")

    override def shape: FlowShape[ByteString, HttpRequest] = FlowShape(in, out)

    override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
      new GraphStageLogic(shape) with StageLogging {
        /**
         * The actor to coordinate between this flow stage and the sources
         * for the upload requests.
         */
        private var streamCoordinator: ActorRef[UploadStreamCoordinationMessage] = _

        /**
         * Keeps track of the number of bytes that have already been uploaded.
         * This is used to find out when stream processing is complete.
         */
        private var bytesUploaded = 0L

        /** Records that an upload finished signal has been received. */
        private var finished = false

        override def preStart(): Unit = {
          super.preStart()
          val callback = getAsyncCallback[Unit] { _ =>
            pollFromCoordinator()
          }
          streamCoordinator = createCoordinatorActor(callback)
        }

        setHandler(in, new InHandler {
          override def onPush(): Unit = {
            streamCoordinator ! UploadChunk(grab(in))
          }

          override def onUpstreamFinish(): Unit = {
            finished = true
          }

          override def onUpstreamFailure(ex: Throwable): Unit = {
            super.onUpstreamFailure(ex)
            stopStreamCoordinator()
          }
        })

        setHandler(out, new OutHandler {
          override def onPull(): Unit = {
            if (bytesUploaded >= fileSize) {
              complete(out)
              stopStreamCoordinator()
            } else {
              push(out, createNextRequest())
            }
          }
        })

        /**
         * Creates the request to upload a chunk of bytes for the current file.
         *
         * @return the upload request
         */
        private def createNextRequest(): HttpRequest = {
          val chunkEnd = math.min(bytesUploaded + config.uploadChunkSize, fileSize) - 1
          log.debug("Uploading chunk {}-{}/{} to {}.", bytesUploaded, chunkEnd, fileSize, uploadUri)
          val dataSource = new UploadRequestSource(config, streamCoordinator)
          val request = createUploadRequest(uploadUri, bytesUploaded, chunkEnd, fileSize,
            Source.fromGraph(dataSource))
          bytesUploaded += config.uploadChunkSize
          request
        }

        /**
         * Handler function for the callback invoked by the coordinator actor.
         * This function is called when the actor requests new data. If
         * upstream is already finished, this graph stage can now be
         * completed; otherwise, data is pulled from the in channel.
         */
        private def pollFromCoordinator(): Unit = {
          if (finished) {
            log.debug("Upload to {} completed; sent {} bytes.", uploadUri, bytesUploaded)
            complete(out)
          } else pull(in)
        }

        /**
         * Stops the stream coordinator actor by sending it an empty chunk
         * message. This is the signal for the actor to stop itself after all
         * pending messages have been delivered.
         */
        private def stopStreamCoordinator(): Unit = {
          streamCoordinator ! UploadStreamCoordinatorActor.EmptyChunk
        }
      }

    /**
     * Creates the actor that does the stream coordination.
     *
     * @param callback the callback for the actor
     * @return the stream coordinator actor
     */
    private[onedrive] def createCoordinatorActor(callback: AsyncCallback[Unit]):
    ActorRef[UploadStreamCoordinationMessage] =
      system.systemActorOf(UploadStreamCoordinatorActor(config.uploadChunkSize, fileSize, callback),
        "OneDriveUploadActor" + NameCounter.incrementAndGet())
  }

  /**
   * An actor implementation that coordinates the interactions between the
   * custom flow stage ([[UploadBytesToRequestFlow]]) and the custom source
   * ([[UploadRequestSource]]) implementations.
   *
   * This actor class passes data from the stream with the content of the
   * file to be uploaded to the sources representing the entities of upload
   * requests. It also makes sure that chunks of the correct size are
   * uploaded.
   *
   * The basic idea behind this actor is that data flow is controlled from
   * downstream to upstream. A source for the entity of an upload request
   * sends a [[NextUploadChunk]] message to this actor to query the next block
   * of data. This request is forwarded to the custom flow stage via an
   * asynchronous callback. As a reaction to this callback, the flow pulls its
   * upstream source and receives a block of data, which it passes to this
   * actor. The actor can then pass this block to the entity source.
   */
  object UploadStreamCoordinatorActor {

    /**
     * The base trait for the messages processed by this actor.
     */
    sealed trait UploadStreamCoordinationMessage

    /**
     * A message processed by [[UploadStreamCoordinatorActor]] that requests
     * the next block of data. The message is sent by [[UploadRequestSource]].
     *
     * @param replyTo the actor to send the response to
     */
    case class NextUploadChunk(replyTo: ActorRef[UploadChunk]) extends UploadStreamCoordinationMessage

    /**
     * A message that contains a block of data.
     *
     * The message is both received and sent by
     * [[UploadStreamCoordinatorActor]]. [[UploadBytesToRequestFlow]] sends
     * this message to pass data from the uploaded file to this actor. It is
     * then also sent as response of a [[NextUploadChunk]] message to
     * [[UploadRequestSource]]. Note that messages with an empty block of data
     * have a special meaning indicating the end of a chunk or the whole
     * stream.
     *
     * @param data the block of data
     */
    case class UploadChunk(data: ByteString) extends UploadStreamCoordinationMessage

    /**
     * Constant for an empty chunk of data. This is used to indicate the end of
     * the stream for a single upload request.
     */
    final val EmptyChunk: UploadChunk = UploadChunk(ByteString.empty)

    /**
     * Returns a ''Props'' object for creating a new instance of this actor
     * class.
     *
     * @param chunkSize the upload chunk size
     * @param fileSize  the size of the file to be uploaded
     * @param callback  a callback to request more data from
     *                  [[UploadBytesToRequestFlow]]
     * @return ''Props'' for creating a new instance
     */
    def apply(chunkSize: Int, fileSize: Long, callback: AsyncCallback[Unit]):
    Behavior[UploadStreamCoordinationMessage] =
      handle(chunkSize, fileSize, callback, List.empty, null, 0, 0, finished = false)

    /**
     * The actual message handling function that manages and updates the state
     * of this actor.
     *
     * @param chunkSize           the upload chunk size
     * @param fileSize            the size of the file to be uploaded
     * @param callback            a callback to request more data
     * @param pendingData         a list with data that can be queried from
     *                            downstream
     * @param client              the client of the latest data request
     * @param bytesUploaded       number of bytes that have been uploaded
     * @param bytesInCurrentChunk number of bytes in the current chunk
     * @param finished            indicates whether all data is processed
     * @return the next behavior function
     */
    private def handle(chunkSize: Int, fileSize: Long, callback: AsyncCallback[Unit], pendingData: List[UploadChunk],
                       client: ActorRef[UploadChunk], bytesUploaded: Long,
                       bytesInCurrentChunk: Int, finished: Boolean): Behavior[UploadStreamCoordinationMessage] =
      Behaviors.receivePartial {
        case (_, NextUploadChunk(replyTo)) =>
          pendingData match {
            case h :: t =>
              replyTo ! h
              if (finished && t.isEmpty) Behaviors.stopped
              else handle(chunkSize, fileSize, callback, t, client, bytesUploaded, bytesInCurrentChunk, finished)
            case _ =>
              callback.invoke(())
              handle(chunkSize, fileSize, callback, pendingData, replyTo, bytesUploaded, bytesInCurrentChunk, finished)
          }

        case (_, UploadChunk(data)) if data.isEmpty =>
          if (pendingData.isEmpty) Behaviors.stopped
          else handle(chunkSize, fileSize, callback, pendingData, client, bytesUploaded, bytesInCurrentChunk,
            finished = true)

        case (_, c@UploadChunk(data)) =>
          val nextBytesUploaded = bytesUploaded + data.length
          val (nextPendingData, nextBytesInCurrentChunk, chunk) = if (bytesInCurrentChunk + data.length > chunkSize) {
            val (last, next) = data.splitAt(chunkSize - bytesInCurrentChunk)
            (List(EmptyChunk, UploadChunk(next)), next.length, UploadChunk(last))
          } else {
            if (bytesUploaded >= fileSize || bytesInCurrentChunk == chunkSize)
              (List(EmptyChunk), 0, c)
            else (pendingData, bytesInCurrentChunk + data.length, c)
          }

          client ! chunk
          handle(chunkSize, fileSize, callback, nextPendingData, client, nextBytesUploaded,
            nextBytesInCurrentChunk, finished)
      }
  }

  /**
   * The class representing the ''Content-Range'' custom header.
   *
   * This header is required for upload requests to a OneDrive server. Large
   * files can be uploaded in multiple chunks, and the header describes the
   * current chunk.
   *
   * @param value the value of this header
   */
  class ContentRangeHeader(override val value: String) extends ModeledCustomHeader[ContentRangeHeader] {
    override def companion: ModeledCustomHeaderCompanion[ContentRangeHeader] = ContentRangeHeader

    override def renderInRequests(): Boolean = true

    override def renderInResponses(): Boolean = true
  }

  object ContentRangeHeader extends ModeledCustomHeaderCompanion[ContentRangeHeader] {
    override def name: String = "Content-Range"

    override def parse(value: String): Try[ContentRangeHeader] =
      Try(new ContentRangeHeader(value))

    /**
     * Returns a ''Content-Range'' header from the parameters of the current
     * upload chunk.
     *
     * @param from  the start byte of the current chunk
     * @param to    the end byte of the current chunk
     * @param total the total file size
     * @return the header with these parameters
     */
    def fromChunk(from: Long, to: Long, total: Long): ContentRangeHeader =
      apply(s"bytes $from-$to/$total")
  }

  /**
   * Executes the upload of a file to a given upload URI. If successful, a
   * future with the ID of the uploaded file is returned.
   *
   * @param config     the OneDrive configuration
   * @param fileSize   the size of the file to be uploaded
   * @param fileSource the source with the content of the file
   * @param uploadUri  the URI where to upload the file
   * @param ec         the execution context
   * @param system     the actor system
   * @return a future with the ID of the drive item affected
   */
  def upload(config: OneDriveConfig, fileSize: Long, fileSource: Source[ByteString, Any], uploadUri: Uri,
             httpSender: ActorRef[HttpRequestSender.HttpCommand])
            (implicit ec: ExecutionContext, system: ActorSystem[_], timeout: Timeout): Future[String] = {
    val requestSource = createUploadRequestsSource(config, fileSize, fileSource, uploadUri)
    val sink = Sink.last[UploadChunkResponse]
    requestSource.mapAsync(1) { req =>
        HttpRequestSender.sendRequestSuccess(httpSender, req)
      }.mapAsync(1) { result =>
        Unmarshal(result.response).to[UploadChunkResponse]
      }.runWith(sink)
      .flatMap { response =>
        response.id match {
          case Some(id) => Future.successful(id)
          case None => Future.failed(throw new IllegalStateException(s"No ID found in upload response to $uploadUri."))
        }
      }
  }

  /**
   * Generates a source that produces HTTP requests to upload the single chunks
   * of the file which is the target of this upload operation. If the file fits
   * into a single chunk, a simple source is created that yields only a single
   * request. Otherwise, the complex chunking logic has to be applied.
   *
   * @param config     the OneDrive configuration
   * @param fileSize   the size of the file to be uploaded
   * @param fileSource the source with the content of the file
   * @param uploadUri  the URI where to upload the file
   * @param ec         the execution context
   * @param system     the actor system
   * @return the ''Source'' with upload requests
   */
  private def createUploadRequestsSource(config: OneDriveConfig, fileSize: Long, fileSource: Source[ByteString, Any],
                                         uploadUri: Uri)
                                        (implicit ec: ExecutionContext, system: ActorSystem[_]):
  Source[HttpRequest, Any] =
    if (fileSize <= config.uploadChunkSize)
      Source.single(createUploadRequest(uploadUri, 0, fileSize - 1, fileSize, fileSource))
    else fileSource.via(new UploadBytesToRequestFlow(config, uploadUri, fileSize))

  /**
   * Creates a request to upload a specific chunk of data of a file.
   *
   * @param uploadUri  the upload URI
   * @param chunkStart the start position of the current chunk
   * @param chunkEnd   the end position of the current chunk
   * @param totalSize  the total file size
   * @param dataSource the source with the binary data of the chunk
   * @return the upload request for this chunk
   */
  private def createUploadRequest(uploadUri: Uri, chunkStart: Long, chunkEnd: Long, totalSize: Long,
                                  dataSource: Source[ByteString, Any]): HttpRequest =
    HttpRequest(method = HttpMethods.PUT,
      uri = uploadUri,
      headers = List(
        ContentRangeHeader.fromChunk(chunkStart, chunkEnd, totalSize),
        AuthExtension.EmptyAuthHeader
      ),
      entity = HttpEntity(ContentTypes.`application/octet-stream`, chunkEnd - chunkStart + 1, dataSource))
}