All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.tsunami.common.io.archiving.GoogleCloudStorageArchiver Maven / Gradle / Ivy

There is a newer version: 0.0.26
Show newest version
/*
 * Copyright 2020 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.tsunami.common.io.archiving;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.validators.PositiveInteger;
import com.google.cloud.WriteChannel;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Storage;
import com.google.common.flogger.GoogleLogger;
import com.google.inject.assistedinject.Assisted;
import com.google.tsunami.common.cli.CliOption;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.inject.Inject;

/** An {@link Archiver} implementation that archives data into Google Cloud Storage. */
public class GoogleCloudStorageArchiver implements Archiver {
  private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();
  // For sanity-checking and to parse out the bucket name and object id.
  // See https://cloud.google.com/storage/docs/bucket-naming
  public static final Pattern GS_URL_PATTERN = Pattern.compile("gs://([^/]{3,63})/(.*)");

  private final Options options;
  private final Storage storage;

  /** All command line options for {@link GoogleCloudStorageArchiver}. */
  @Parameters(separators = "=")
  public static final class Options implements CliOption {
    @Parameter(
        names = "--gcs-archiver-chunk-size-in-bytes",
        description = "The size of the data chunk when GCS archiver uploads data to Cloud Storage.",
        validateWith = PositiveInteger.class)
    int chunkSizeInBytes = 1_000;

    @Parameter(
        names = "--gcs-archiver-chunk-upload-threshold-in-bytes",
        description = "The default data size threshold in bytes to enable chunk upload to GCS.",
        validateWith = PositiveInteger.class)
    int chunkUploadThresholdInBytes = 1_000_000;

    @Override
    public void validate() {}
  }

  @Inject
  public GoogleCloudStorageArchiver(Options options, @Assisted Storage storage) {
    this.options = checkNotNull(options);
    this.storage = checkNotNull(storage);
  }

  private static BlobInfo parseBlobInfo(String gcsUrl) {
    Matcher matcher = GS_URL_PATTERN.matcher(gcsUrl);
    checkArgument(matcher.matches(), "Invalid GCS URL: '%s'", gcsUrl);

    String bucketName = matcher.group(1);
    String objectName = matcher.group(2);
    return BlobInfo.newBuilder(bucketName, objectName).build();
  }

  @Override
  public boolean archive(String gcsUrl, byte[] data) {
    BlobInfo blobInfo = parseBlobInfo(gcsUrl);

    if (data.length <= options.chunkUploadThresholdInBytes) {
      // Create the blob in one request.
      logger.atInfo().log("Archiving data to GCS at '%s' in one request.", gcsUrl);
      storage.create(blobInfo, data);
      return true;
    }

    // When content is large (1MB or more) it is recommended to write it in chunks via the blob's
    // channel writer.
    logger.atInfo().log(
        "Content is larger than threshold, archiving data to GCS at '%s' in chunks.", gcsUrl);
    try (WriteChannel writer = storage.writer(blobInfo)) {
      for (int chunkOffset = 0;
          chunkOffset < data.length;
          chunkOffset += options.chunkSizeInBytes) {
        int chunkSize = Math.min(data.length - chunkOffset, options.chunkSizeInBytes);
        writer.write(ByteBuffer.wrap(data, chunkOffset, chunkSize));
      }
      return true;
    } catch (IOException e) {
      logger.atSevere().withCause(e).log("Unable to archving data to GCS at '%s'.", gcsUrl);
      return false;
    }
  }

  /** The factory of {@link GoogleCloudStorageArchiver} types for usage with assisted injection. */
  // TODO(b/145315535): consider wrap the Storage API into a client library. Current implementation
  // is not easily testable.
  public interface Factory {
    GoogleCloudStorageArchiver create(Storage storage);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy