com.google.cloud.storage.BlobWriteSessionConfigs Maven / Gradle / Ivy
Show all versions of google-cloud-storage Show documentation
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.storage;
import com.google.api.core.BetaApi;
import com.google.cloud.storage.GrpcStorageOptions.GrpcStorageDefaults;
import com.google.cloud.storage.ParallelCompositeUploadBlobWriteSessionConfig.PartCleanupStrategy;
import com.google.cloud.storage.Storage.BlobTargetOption;
import com.google.cloud.storage.Storage.BlobWriteOption;
import com.google.cloud.storage.TransportCompatibility.Transport;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.nio.channels.WritableByteChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
/**
* Factory class to select and construct {@link BlobWriteSessionConfig}s.
*
* There are several strategies which can be used to upload a {@link Blob} to Google Cloud
* Storage. This class provides factories which allow you to select the appropriate strategy for
* your workload.
*
*
* Comparison of Strategies
*
* Strategy
* Factory Method(s)
* Description
* Transport(s) Supported
* Considerations
* Retry Support
* Cloud Storage API used
*
*
* Default (Chunk based upload)
* {@link #getDefault()}
*
* Buffer up to a configurable amount of bytes in memory, write to Cloud Storage when
* full or close. Buffer size is configurable via
* {@link DefaultBlobWriteSessionConfig#withChunkSize(int)}
*
* gRPC
* The network will only be used for the following operations:
*
* - Creating the Resumable Upload Session
* - Transmitting zero or more incremental chunks
* - Transmitting the final chunk and finalizing the Resumable Upload Session
* -
* If any of the above are interrupted with a retryable error, the Resumable Upload Session
* will be queried to reconcile client side state with Cloud Storage
*
*
*
*
* Each chunk is retried up to the limitations specified in
* {@link StorageOptions#getRetrySettings()}
*
* Resumable Upload
*
*
* Buffer to disk then upload
*
*
* - {@link #bufferToDiskThenUpload(Path)}
* - {@link #bufferToDiskThenUpload(Collection) bufferToDiskThenUpload(Collection<Path>)}
* - {@link #bufferToTempDirThenUpload()}
*
*
*
* Buffer bytes to a temporary file on disk. On {@link WritableByteChannel#close() close()}
* upload the entire files contents to Cloud Storage. Delete the temporary file.
*
* gRPC
*
*
* - A Resumable Upload Session will be used to upload the file on disk.
* -
* If the upload is interrupted with a retryable error, the Resumable Upload Session will
* be queried to restart the upload from Cloud Storage's last received byte
*
*
*
*
* Upload the file in the fewest number of RPC possible retrying within the limitations
* specified in {@link StorageOptions#getRetrySettings()}
*
* Resumable Upload
*
*
* Journal to disk while uploading
* {@link #journaling(Collection) journaling(Collection<Path>)}
*
* Create a Resumable Upload Session, before transmitting bytes to Cloud Storage write
* to a recovery file on disk. If the stream to Cloud Storage is interrupted with a
* retryable error query the offset of the Resumable Upload Session, then open the recovery
* file from the offset and transmit the bytes to Cloud Storage.
*
* gRPC
*
*
* -
* The stream to Cloud Storage will be held open until a) the write is complete
* b) the stream is interrupted
*
* -
* Because the bytes are journaled to disk, the upload to Cloud Storage can only
* be as fast as the disk.
*
* -
* The use of Compute
* Engine Local NVMe SSD is strongly encouraged compared to Compute Engine Persistent
* Disk.
*
*
*
*
* Opening the stream for upload will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()}
* All bytes are buffered to disk and allow for recovery from any arbitrary offset.
*
* Resumable Upload
*
*
* Parallel Composite Upload
* {@link #parallelCompositeUpload()}
*
* Break the stream of bytes into smaller part objects uploading each part in parallel. Then
* composing the parts together to make the ultimate object.
*
* gRPC
*
*
* -
* Performing parallel composite uploads costs more money.
* Class A
* operations are performed to create each part and to perform each compose. If a storage
* tier other than
*
STANDARD
* is used, early deletion fees apply to deletion of the parts.
* An illustrative example. Upload a 5GiB object using 64MiB as the max size per part.
*
* - 80 Parts will be created (Class A)
* - 3 compose calls will be performed (Class A)
* - Delete 80 Parts along with 2 intermediary Compose objects (Free tier as long as {@code STANDARD} class)
*
*
* Once the parts and intermediary compose objects are deleted, there will be no storage charges related to those temporary objects.
*
* -
* The service account/credentials used to perform the parallel composite upload require
* {@code storage.objects.delete}
* in order to cleanup the temporary part and intermediary compose objects.
*
To handle handle part and intermediary compose object deletion out of band
* passing {@link PartCleanupStrategy#never()} to {@link ParallelCompositeUploadBlobWriteSessionConfig#withPartCleanupStrategy(PartCleanupStrategy)}
* will prevent automatic cleanup.
*
* -
* Please see the
* Parallel composite uploads documentation for a more in depth explanation of the
* limitations of Parallel composite uploads.
*
* -
* A failed upload can leave part and intermediary compose objects behind which will count
* as storage usage, and you will be billed for it.
*
By default if an upload fails, an attempt to cleanup the part and intermediary compose
* will be made. However if the program were to crash there is no means for the client to
* perform the cleanup.
*
Every part and intermediary compose object will be created with a name which ends in
* {@code .part}. An Object Lifecycle Management rule can be setup on your bucket to automatically
* cleanup objects with the suffix after some period of time. See
* Object Lifecycle Management
* for full details and a guide on how to setup a Delete
* rule with a suffix match condition.
*
* -
* Using parallel composite uploads are not a one size fits all solution. They have very
* real overhead until uploading a large enough object. The inflection point is dependent
* upon many factors, and there is no one size fits all value. You will need to experiment
* with your deployment and workload to determine if parallel composite uploads are useful
* to you.
*
*
*
*
* Automatic retires will be applied for the following:
*
* - Creation of each individual part
* - Performing an intermediary compose
* - Performing a delete to cleanup each part and intermediary compose object
*
*
* Retrying the creation of the final object is contingent upon if an appropriate precondition
* is supplied when calling {@link Storage#blobWriteSession(BlobInfo, BlobWriteOption...)}.
* Either {@link BlobTargetOption#doesNotExist()} or {@link Storage.BlobTargetOption#generationMatch(long)}
* should be specified in order to make the final request idempotent.
* Each operation will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()}
*
*
*
*
*
*
*
* @see BlobWriteSessionConfig
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @since 2.26.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
public final class BlobWriteSessionConfigs {
private BlobWriteSessionConfigs() {}
/**
* Factory to produce the default configuration for uploading an object to Cloud Storage.
*
* Configuration of the chunk size can be performed via {@link
* DefaultBlobWriteSessionConfig#withChunkSize(int)}.
*
* @see GrpcStorageDefaults#getDefaultStorageWriterConfig()
* @since 2.26.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility({Transport.GRPC})
public static DefaultBlobWriteSessionConfig getDefault() {
return new DefaultBlobWriteSessionConfig(ByteSizeConstants._16MiB);
}
/**
* Create a new {@link BlobWriteSessionConfig} which will first buffer the content of the object
* to a temporary file under {@code java.io.tmpdir}.
*
*
Once the file on disk is closed, the entire file will then be uploaded to Cloud Storage.
*
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @since 2.26.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility({Transport.GRPC})
public static BlobWriteSessionConfig bufferToTempDirThenUpload() throws IOException {
return bufferToDiskThenUpload(
Paths.get(System.getProperty("java.io.tmpdir"), "google-cloud-storage"));
}
/**
* Create a new {@link BlobWriteSessionConfig} which will first buffer the content of the object
* to a temporary file under the specified {@code path}.
*
*
Once the file on disk is closed, the entire file will then be uploaded to Cloud Storage.
*
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @since 2.26.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility({Transport.GRPC})
public static BufferToDiskThenUpload bufferToDiskThenUpload(Path path) throws IOException {
return bufferToDiskThenUpload(ImmutableList.of(path));
}
/**
* Create a new {@link BlobWriteSessionConfig} which will first buffer the content of the object
* to a temporary file under one of the specified {@code paths}.
*
*
Once the file on disk is closed, the entire file will then be uploaded to Cloud Storage.
*
*
The specifics of how the work is spread across multiple paths is undefined and subject to
* change.
*
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @since 2.26.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility({Transport.GRPC})
public static BufferToDiskThenUpload bufferToDiskThenUpload(Collection paths)
throws IOException {
return new BufferToDiskThenUpload(ImmutableList.copyOf(paths), false);
}
/**
* Create a new {@link BlobWriteSessionConfig} which will journal writes to a temporary file under
* one of the specified {@code paths} before transmitting the bytes to Cloud Storage.
*
* The specifics of how the work is spread across multiple paths is undefined and subject to
* change.
*
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @since 2.27.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility(Transport.GRPC)
public static JournalingBlobWriteSessionConfig journaling(Collection paths) {
return new JournalingBlobWriteSessionConfig(ImmutableList.copyOf(paths), false);
}
/**
* Create a new {@link BlobWriteSessionConfig} which will perform a Parallel Composite
* Upload by breaking the stream into parts and composing the parts together to make the
* ultimate object.
*
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
* @since 2.28.0 This new api is in preview and is subject to breaking changes.
*/
@BetaApi
@TransportCompatibility({Transport.GRPC})
public static ParallelCompositeUploadBlobWriteSessionConfig parallelCompositeUpload() {
return ParallelCompositeUploadBlobWriteSessionConfig.withDefaults();
}
}