
org.apache.jackrabbit.api.binary.BinaryUpload Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.jackrabbit.api.binary;
import java.net.URI;
import org.apache.jackrabbit.api.JackrabbitValueFactory;
import org.jetbrains.annotations.NotNull;
import org.osgi.annotation.versioning.ProviderType;
/**
* Describes uploading a binary through HTTP requests in a single or multiple
* parts. This will be returned by
* {@link JackrabbitValueFactory#initiateBinaryUpload(long, int)}. A high-level
* overview of the process can be found in {@link JackrabbitValueFactory}.
*
*
* Note that although the API allows URI schemes other than "http(s)", the
* upload functionality is currently only defined for HTTP.
*
*
* A caller usually needs to pass the information provided by this interface to
* a remote client that is in possession of the actual binary, who then has to
* upload the binary using HTTP according to the logic described below. A remote
* client is expected to support multi-part uploads as per the logic described
* below, in case multiple URIs are returned.
*
*
* Once a remote client finishes uploading the binary data, the application must
* be notified and must then call
* {@link JackrabbitValueFactory#completeBinaryUpload(String)} to complete the
* upload. This completion requires the exact upload token obtained from
* {@link #getUploadToken()}.
*
*
Upload algorithm
*
* A remote client will have to follow this algorithm to upload a binary based
* on the information provided by this interface.
*
*
* Please be aware that if the size passed to
* {@link JackrabbitValueFactory#initiateBinaryUpload(long, int)} was an
* estimation, but the actual binary is larger, there is no guarantee the
* upload will be possible using all {@link #getUploadURIs()} and the
* {@link #getMaxPartSize()}. In such cases, the application should restart the
* transaction using the correct size.
*
*
Variables used
*
* - {@code fileSize}: the actual binary size (must be known at this
* point)
* - {@code minPartSize}: the value from {@link #getMinPartSize()}
* - {@code maxPartSize}: the value from {@link #getMaxPartSize()}
* - {@code numUploadURIs}: the number of entries in {@link
* #getUploadURIs()}
* - {@code uploadURIs}: the entries in {@link #getUploadURIs()}
* - {@code partSize}: the part size to be used in the upload (to be
* determined in the algorithm)
*
*
* Steps
*
* -
* If {@code (fileSize / maxPartSize) > numUploadURIs}, then the
* client cannot proceed and will have to request a new set of URIs
* with the right fileSize as {@code maxSize}
*
* -
* If {@code fileSize < minPartSize}, then take the first provided
* upload URI to upload the entire binary, with
* {@code partSize = fileSize}
*
* -
* (optional) If the client has more information to optimize, the
* {@code partSize} can be chosen, under the condition that all of these are
* true:
*
* - {@code partSize >= minPartSize}
* - {@code partSize <= maxPartSize}
* (unless {@code maxPartSize = -1} meaning unlimited)
* - {@code partSize > (fileSize / numUploadURIs)}
*
*
* -
* Otherwise all part URIs are to be used. The {@code partSize}
* to use for all parts except the last would be calculated using:
*
partSize = (fileSize + numUploadURIs - 1) / numUploadURIs
*
* -
* Upload: segment the binary into {@code partSize}, for each segment take the
* next URI from {@code uploadURIs} (strictly in order), proceed with a standard
* HTTP PUT for each, and for the last part use whatever segment size is left
*
* -
* If a segment fails during upload, retry (up to a certain timeout)
*
* -
* After the upload has finished successfully, notify the application,
* for example through a complete request, passing the {@link
* #getUploadToken() upload token}, and the application will call {@link
* JackrabbitValueFactory#completeBinaryUpload(String)} with the token
*
*
*
* Example JSON view
*
* A JSON representation of this interface as passed back to a remote client
* might look like this:
*
*
* {
* "uploadToken": "aaaa-bbbb-cccc-dddd-eeee-ffff-gggg-hhhh",
* "minPartSize": 10485760,
* "maxPartSize": 104857600,
* "uploadURIs": [
* "http://server.com/upload/1",
* "http://server.com/upload/2",
* "http://server.com/upload/3",
* "http://server.com/upload/4"
* ]
* }
*
*/
@ProviderType
public interface BinaryUpload {
/**
* Returns a list of URIs that can be used for uploading binary data
* directly to a storage location in one or more parts.
*
*
* Remote clients must support multi-part uploading as per the
* upload algorithm described above. Clients
* are not necessarily required to use all of the URIs provided. A client
* may choose to use fewer, or even only one of the URIs. However, it must
* always ensure the part size is between {@link #getMinPartSize()} and
* {@link #getMaxPartSize()}. These can reflect strict limitations of the
* storage provider.
*
*
* Regardless of the number of URIs used, they must be consumed in sequence,
* without skipping any, and the order of parts the original binary is split
* into must correspond exactly with the order of URIs.
*
*
* For example, if a client wishes to upload a binary in three parts and
* there are five URIs returned, the client must use the first URI to
* upload the first part, the second URI to upload the second part, and
* the third URI to upload the third part. The client is not required to
* use the fourth and fifth URIs. However, using the second URI to upload
* the third part may result in either an upload failure or a corrupted
* upload; likewise, skipping the second URI to use subsequent URIs may
* result in either an upload failure or a corrupted upload.
*
*
* While the API supports multi-part uploading via multiple upload URIs,
* implementations are not required to support multi-part uploading. If the
* underlying implementation does not support multi-part uploading, a single
* URI will be returned regardless of the size of the data being uploaded.
*
*
* Security considerations:
*
*
* -
* The URIs cannot be shared with other users. They must only be returned to
* authenticated requests corresponding to this session user or trusted system
* components.
*
* -
* The URIs must not be persisted for later use and will typically be time limited.
*
* -
* The URIs will only grant access to this particular binary.
*
* -
* The client cannot infer any semantics from the URI structure and path names.
* It would typically include a cryptographic signature. Any change to the URIs will
* likely result in a failing request.
*
*
*
* @return Iterable of URIs that can be used for uploading directly to a
* storage location.
*/
@NotNull
Iterable getUploadURIs();
/**
* Return the smallest possible part size in bytes. If a consumer wants to
* choose a custom part size, it cannot be smaller than this value. This
* does not apply to the final part. This value will be equal or larger than
* zero.
*
*
* Note that the API offers no guarantees that using this minimal part size
* is possible with the number of available {@link #getUploadURIs()}. This
* might not be the case if the binary is too large. Please refer to the
* upload algorithm for the correct use of
* this value.
*
* @return The smallest part size acceptable for multi-part uploads.
*/
long getMinPartSize();
/**
* Return the largest possible part size in bytes. If a consumer wants to
* choose a custom part size, it cannot be larger than this value.
* If this returns -1, the maximum is unlimited.
*
*
* The API guarantees that a client can split the binary of the requested
* size using this maximum part size and there will be sufficient URIs
* available in {@link #getUploadURIs()}. Please refer to the
* upload algorithm for the correct use of
* this value.
*
* @return The maximum part size acceptable for multi-part uploads or -1
* if there is no limit.
*/
long getMaxPartSize();
/**
* Returns a token identifying this upload. This is required to finalize the upload
* at the end by calling {@link JackrabbitValueFactory#completeBinaryUpload(String)}.
*
*
* The format of this string is implementation-dependent. Implementations must ensure
* that clients cannot guess tokens for existing binaries.
*
* @return A unique token identifying this upload.
*/
@NotNull
String getUploadToken();
}