All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.genomics.dataflow.utils.GCSHelper Maven / Gradle / Ivy

/*
 * Copyright (C) 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.genomics.dataflow.utils;

import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.http.HttpHeaders;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.storage.Storage;
import com.google.api.services.storage.model.StorageObject;
import com.google.cloud.genomics.utils.CredentialFactory;
import com.google.cloud.genomics.utils.GenomicsFactory;
import com.google.cloud.genomics.utils.OfflineAuth;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.security.GeneralSecurityException;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.annotation.Nullable;

/**
 * A helper class to download from Google Cloud Storage.
 */
public class GCSHelper {

  private static final Logger LOGGER = Logger.getLogger(GCSHelper.class.getName());
  private static final boolean IS_APP_ENGINE = false;
  private Storage storage;

  /**
   * Global instance of the HTTP transport.
   */
  private static HttpTransport httpTransport;
  /**
   * Global instance of the JSON factory.
   */
  private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();


  /**
   * Connects to storage (this is the preferred way).
   *
   * @param popts already-filled options.
   */
  public GCSHelper(GenomicsOptions popts) throws GeneralSecurityException, IOException {
    Preconditions.checkNotNull(popts);
    // set up storage object
    GenomicsFactory factory = GenomicsFactory.builder(popts.getAppName())
        .build();
    httpTransport = factory.getHttpTransport();
    Storage.Builder builder = new Storage.Builder(httpTransport, JSON_FACTORY, null)
        .setApplicationName(popts.getAppName());
    OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(popts);
    storage = factory.fromOfflineAuth(builder, auth).build();
  }

  /**
   * Connects to storage
   * (use this if you're a Dataflow worker, as you don't have access to the clients-secrets.json from there).
   *
   * @param offlineAuth serialized credentials
   */
  public GCSHelper(OfflineAuth offlineAuth) throws GeneralSecurityException, IOException {
    Preconditions.checkNotNull(offlineAuth);
    // set up storage object
    GenomicsFactory factory = GenomicsFactory.builder().build();
    httpTransport = factory.getHttpTransport();
    Storage.Builder builder = new Storage.Builder(httpTransport, JSON_FACTORY, null);
    storage = factory.fromOfflineAuth(builder, offlineAuth).build();
  }

  /**
   * Connects to storage.
   *
   * @param appName     name of your app
   * @param secretsFile path to clients-secrets.json
   */
  public GCSHelper(String appName, String secretsFile) throws GeneralSecurityException, IOException {
    // cf https://groups.google.com/forum/#!msg/google-genomics-discuss/P9A9odUXwaM/ISdIzOXNS3YJ
    GenomicsFactory factory = GenomicsFactory.builder(appName).build();
    httpTransport = factory.getHttpTransport();
    Credential creds = CredentialFactory.getCredentialFromClientSecrets(secretsFile, appName);
    Storage.Builder builder = new Storage.Builder(httpTransport, JSON_FACTORY, null)
        .setApplicationName(appName);
    storage = factory.fromCredential(builder, creds).build();
  }

  @VisibleForTesting
  GCSHelper() {
  }

  /**
   * Get the underlying GCS Storage object, for advanced uses
   * (e.g. a download progressbar).
   */
  public Storage getStorage() {
    return this.storage;
  }

  /**
   * @param name of the file we're interested in
   * @return size of the file, in bytes
   * @throws IOException
   */
  public long getFileSize(String bucket, String name) throws IOException {
    Storage.Objects.Get getObject = storage.objects().get(bucket, name);
    StorageObject object = getObject.execute();
    BigInteger size = object.getSize();
    if (size.compareTo(BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
      throw new RuntimeException("File size is too big for a long!");
    }
    return size.longValue();
  }


  /**
   * Retrieve part of the file.
   *
   * @throws IOException
   */
  public ByteArrayOutputStream getPartialObjectData(String bucket, String fname, long start, long endIncl) throws IOException {
    return getPartialObjectData(bucket, fname, start, endIncl, null);
  }

  /**
   * Retrieve part of the file.
   *
   * Example thing you may want to do with the result:
   * String str = new String( Arrays.copyOfRange(out.toByteArray() );
   *
   * @throws IOException
   */
  public ByteArrayOutputStream getPartialObjectData(String bucket, String fname, long start, long endIncl,
                                                    @Nullable ByteArrayOutputStream optionalOldOutputToReuse) throws IOException {
    ByteArrayOutputStream out;
    if (null == optionalOldOutputToReuse) {
      out = new ByteArrayOutputStream((int) (endIncl - start + 1));
    } else {
      out = optionalOldOutputToReuse;
      out.reset();
    }
    Storage.Objects.Get getObject = storage.objects().get(bucket, fname);

    getObject.setRequestHeaders(new HttpHeaders().setRange(
        String.format("bytes=%d-%d", start, endIncl)));

    getObject.getMediaHttpDownloader().setDirectDownloadEnabled(!IS_APP_ENGINE);
    getObject.executeMediaAndDownloadTo(out);

    if (out.size() != (endIncl - start + 1)) {
      String err = "getPartialObjectData failed! Expected " + (endIncl - start + 1) + " bytes, got " + out.size();
      LOGGER.log(Level.WARNING, err);
      throw new IOException(err);
    }

    return out;
  }

  /**
   * Retrieve the whole file (to memory).
   *
   * @throws IOException
   */
  public InputStream getWholeObject(String bucket, String fname) throws IOException {
    Storage.Objects.Get getObject = storage.objects().get(bucket, fname);
    return getObject.executeMediaAsInputStream();
  }

  /**
   * Retrieve the whole file (to a temporary file on disk).
   *
   * @throws IOException
   */
  public File getAsFile(String bucket, String fname) throws IOException {
    Storage.Objects.Get request = storage.objects().get(bucket, fname);
    File file = File.createTempFile("gcsdownload", "obj");
    try (OutputStream out = new FileOutputStream(file)) {
      request.executeMediaAndDownloadTo(out);
    }
    return file;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy