All Downloads are FREE. Search and download functionalities are using the official Maven repository.

loci.common.S3Handle Maven / Gradle / Ivy

The newest version!
/*
 * #%L
 * Common package for I/O and related utilities
 * %%
 * Copyright (C) 2018 Open Microscopy Environment:
 *   - Board of Regents of the University of Wisconsin-Madison
 *   - Glencoe Software, Inc.
 *   - University of Dundee
 * %%
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * #L%
 */

package loci.common;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Pattern;

import loci.common.services.DependencyException;
import loci.common.services.S3ClientService;
import loci.common.services.S3ClientServiceException;
import loci.common.services.S3ClientStat;

import loci.common.services.ServiceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Provides random access to S3 buckets using the IRandomAccess interface.
 * Instances of S3Handle are read-only.
 *
 * @see IRandomAccess
 * @see StreamHandle
 * @see java.net.URLConnection
 *
 */
public class S3Handle extends StreamHandle {

  /**
   * An S3 IOException that was not thrown immediately
   */
  class DelayedObjectNotFound extends IOException {
    DelayedObjectNotFound(S3Handle s3) {
      super(String.format("Object not found: [%s] %s", s3, s3.objectNotFound), s3.objectNotFound);
    }
  }

  /** Default protocol for fetching s3:// */
  public final static String DEFAULT_S3_PROTOCOL = "https";

  private static final Logger LOGGER = LoggerFactory.getLogger(S3Handle.class);

  protected final static Pattern SCHEME_PARSER = Pattern.compile("s3(\\+\\p{Alnum}+)?(://.*)?");

  /** S3 configuration */
  private final Settings settings;

  /** Parsed URI used to configure this handle */
  private final URI uri;

  /** access key, if provided */
  private final String accessKey;

  /** secret key, if provided */
  private final String secretKey;

  /** name of the bucket */
  private final String bucket;

  /** endpoint to which requests will be sent */
  private final String server;

  /** port at the given server */
  private final int port;

  /** remaining path, or key, for this accessed resource */
  private final String path;

  /** S3 client */
  private S3ClientService s3Client;

  /** Remote file stat */
  private S3ClientStat stat;

  /** Is this a directory (currently only buckets are considered directories */
  private boolean isBucket;

  /**
   * Exception if thrown during construction
   */
  private Throwable objectNotFound;

  /** If seeking more than this distance reset and reopen at offset */
  protected static final int S3_MAX_FORWARD_SEEK = 1048576;

  /**
   * Return true if this is a URL with an s3 scheme
   * @param url URL
   * @return true if this class can handle url
   */
  public static boolean canHandleScheme(String url) {
    return SCHEME_PARSER.matcher(url).matches();
  }

  /**
   * Open an S3 file
   *
   * @param url the full URL to the S3 resource
   * @throws IOException if there is an error during opening
   */
  public S3Handle(String url) throws IOException {
    this(url, true, null);
  }

  /**
   * Open an S3 file
   *
   * @param uristr the full URL to the S3 resource
   * @param initialize If true open the stream, otherwise just parse connection
   *        string
   * @param s custom settings object
   * @throws IOException if there is an error during opening
   */
  public S3Handle(String uristr, boolean initialize, Settings s) throws
      IOException {
    if (s == null) {
      this.settings = new StreamHandle.Settings();
    }
    else {
      this.settings = s;
    }

    try {
      this.uri = new URI(uristr);
    } catch (URISyntaxException e) {
      throw new RuntimeException("Invalid URI " + uristr, e);
    }

    // access[:secret]
    String auth = this.uri.getUserInfo();
    String accessKey = null;
    String secretKey = null;
    if (auth != null) {
      String[] authparts = auth.split(":", 2);
      accessKey = authparts[0];
      if (authparts.length > 1) {
        secretKey = authparts[1];
      }
    }
    this.accessKey = accessKey;
    this.secretKey = secretKey;

    String protocol;
    String scheme = this.uri.getScheme();
    if (scheme.equals("s3")) {
      protocol = DEFAULT_S3_PROTOCOL;
    }
    else if (scheme.startsWith("s3+")) {
      protocol = scheme.substring(3);
    }
    else {
      protocol = scheme;
    }
    this.server = protocol + "://" + this.uri.getHost();

    if (this.uri.getPort() == -1) {
      this.port = 0;
    }
    else {
      this.port = this.uri.getPort();
    }

    // First path component is the bucket
    // TODO: Parsing this seems way more complicated than it should be
    String fullpath = this.uri.getPath();
    if (fullpath == null || fullpath.length() == 0) {
      fullpath = "/";
    }
    // Leading / means first element is always ""
    String[] pathparts = fullpath.split("/", 3);
    if (pathparts[1].length() > 0) {
      this.bucket = pathparts[1];
    }
    else {
      this.bucket = null;
    }
    if (pathparts.length > 2 && pathparts[2].length() > 0) {
      this.path = pathparts[2];
    }
    else {
      this.path = null;
    }

    this.isBucket = false;
    this.stat = null;

    if (initialize) {
      // Throw if there is an IOException, otherwise save the exception and only throw if a method
      // that requires a valid object is called
      this.connect();
      try {
        this.initialize();
      }
      catch (S3ClientServiceException e) {
        this.objectNotFound = e;
        LOGGER.debug("Object not found: [{}] {}", this, e);
      }
      LOGGER.trace("isBucket:{} stat:{}", isBucket, stat);
    }
  }

  /**
   * Connect to the server
   * @throws IOException if there was an error connecting to the server
   */
  protected void connect() throws IOException {
    final String appName = "Bio-Formats";
    // TODO: Replace "dev" with a version
    final String appVersion = "dev";
    try {
      ServiceFactory factory = new ServiceFactory();
      s3Client = factory.getInstance(S3ClientService.class);
      s3Client.initialize(server, port, accessKey, secretKey, appName, appVersion);
    }
    catch (S3ClientServiceException e) {
      throw new IOException(String.format(
              "Failed to connect: %s", this), e);
    }
    catch (DependencyException e) {
      throw new IOException(String.format(
          "S3 requires additional dependencies: %s", this), e);
    }
    LOGGER.trace("connected: server:{} port:{}", server, port);
  }

  /**
   * Check bucket or object exists
   * @throws IOException if unable to get the object
   * @throws S3ClientServiceException if unable to get the object
   */
  protected void initialize() throws
      IOException,
      S3ClientServiceException {
    if (path == null) {
      isBucket = s3Client.bucketExists(bucket);
    }
    else {
      isBucket = false;
      stat = s3Client.statObject(bucket, path);
      resetStream();
    }
  }

  public String getServer() {
    return server;
  }

  public int getPort() {
    return port;
  }

  public String getBucket() {
    return bucket;
  }

  public String getPath() {
    return path;
  }

  /**
   * Download an S3 object to a file system cache if it doesn't already exist
   *
   * @param url the full URL to the S3 resource
   * @param s custom settings object
   * @return File path to the cached object
   * @throws IOException if there is an error during reading or writing
   * @throws HandleException if no destination for the cache is provided
   */
  public static String cacheObject(String url, Settings s) throws
      IOException,
      HandleException {
    String cacheroot = s.getRemoteCacheRootDir();
    if (cacheroot == null) {
      throw new HandleException("Remote cache root dir is not set");
    }
    S3Handle s3 = new S3Handle(url, true, s);
    // TODO: Need to ensure this path is safe. Is there a Java method to check?
    String cacheobj = s3.getCacheKey();
    // Hopefully creates a cross-platform path
    Path cachepath = Paths.get(cacheroot, cacheobj);

    if (Files.exists(cachepath)) {
      LOGGER.debug("Found existing cache for {} at {}", s3, cachepath);
    }
    else {
      LOGGER.debug("Caching {} to {}", s3, cachepath);
      s3.downloadObject(cachepath);
      LOGGER.debug("Downloaded {}", cachepath);
    }
    return cachepath.toString();
  }

  public String getCacheKey(){
    String cachekey =
      getServer().replace("://", "/") + "/" +
      getPort() + "/" +
      getBucket() + "/" +
      getPath();
    return cachekey;
  }

  protected void downloadObject(Path destination) throws HandleException, IOException {
    LOGGER.trace("destination:{}", destination);
    if (this.stat == null || this.objectNotFound != null) {
      throw new IOException("Object not found " + this, this.objectNotFound);
    }
    if (path == null) {
      throw new HandleException("Download path=null not allowed");
    }
    Files.createDirectories(destination.getParent());
    try {
      s3Client.getObject(bucket, path, destination.toString());
    }
    catch (S3ClientServiceException e) {
        throw new HandleException("Download failed " + toString(), e);
      }
  }

  /**
   * Is this an accessible bucket?
   * TODO: If this bucket doesn't exist do we return false or thrown an exception?
   *
   * @return True if a bucket
   */
  public boolean isBucket() {
    //if (this.objectNotFound != null) {
    //  throw new DelayedObjectNotFound(this);
    //}
    return isBucket;
  }

  /* @see IRandomAccess#length() */
  @Override
  public long length() throws IOException {
    if (this.stat == null || this.objectNotFound != null) {
      throw new DelayedObjectNotFound(this);
    }
    return length;
  }

  /**
   * @see StreamHandle#seek(long)
   */
  @Override
  public void seek(long pos) throws IOException {
    LOGGER.trace("{}", pos);
    if (this.stat == null || this.objectNotFound != null) {
      throw new DelayedObjectNotFound(this);
    }
    long diff = pos - fp;

    if (diff < 0 || diff > S3_MAX_FORWARD_SEEK) {
      resetStream(pos);
    }
    else {
      super.seek(pos);
    }
  }

  /**
   * @see StreamHandle#resetStream()
   */
  @Override
  protected void resetStream() throws IOException {
    resetStream(0);
  }

  /**
   * Does this represent an accessible location?
   * @return true if this location is accessible
   * @throws IOException if unable to determine whether this location is accessible
   */
  @Override
  public boolean exists() throws IOException {
    return (objectNotFound == null) && (isBucket || stat != null);
  }

  /**
   * Reset the stream to an offset position
   * @param offset Offset into object
   * @throws IOException if there is an error during reading or writing
   */
  protected void resetStream(long offset) throws IOException {
    LOGGER.trace("Resetting {}", offset);
    if (this.stat == null || this.objectNotFound != null) {
      throw new DelayedObjectNotFound(this);
    }
    try {
      length = stat.length();
      stream = new DataInputStream(new BufferedInputStream(
              s3Client.getObject(bucket, path, offset)));
      fp = offset;
      mark = offset;
      }
      catch (S3ClientServiceException e) {
        throw new IOException(String.format(
              "failed to load s3: %s\n\t%s", uri, this), e);
    }
  }

  public String toString() {
    boolean found = (objectNotFound == null) && (isBucket || stat != null);
    return String.format("server:%s port:%d bucket:%s path:%s found:%s",
                          server, port, bucket, path, found);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy