All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.firehose.s3.StaticS3FirehoseFactory Maven / Gradle / Ivy

There is a newer version: 0.12.3
Show newest version
/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.firehose.s3;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import io.druid.data.input.impl.PrefetchableTextFilesFirehoseFactory;
import io.druid.java.util.common.CompressionUtils;
import io.druid.java.util.common.IAE;
import io.druid.java.util.common.logger.Logger;
import org.jets3t.service.ServiceException;
import org.jets3t.service.StorageObjectsChunk;
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
import org.jets3t.service.model.S3Object;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

/**
 * Builds firehoses that read from a predefined list of S3 objects and then dry up.
 */
public class StaticS3FirehoseFactory extends PrefetchableTextFilesFirehoseFactory
{
  private static final Logger log = new Logger(StaticS3FirehoseFactory.class);
  private static final long MAX_LISTING_LENGTH = 1024;

  private final RestS3Service s3Client;
  private final List uris;
  private final List prefixes;

  @JsonCreator
  public StaticS3FirehoseFactory(
      @JacksonInject("s3Client") RestS3Service s3Client,
      @JsonProperty("uris") List uris,
      @JsonProperty("prefixes") List prefixes,
      @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes,
      @JsonProperty("maxFetchCapacityBytes") Long maxFetchCapacityBytes,
      @JsonProperty("prefetchTriggerBytes") Long prefetchTriggerBytes,
      @JsonProperty("fetchTimeout") Long fetchTimeout,
      @JsonProperty("maxFetchRetry") Integer maxFetchRetry
  )
  {
    super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry);
    this.s3Client = Preconditions.checkNotNull(s3Client, "null s3Client");
    this.uris = uris == null ? new ArrayList<>() : uris;
    this.prefixes = prefixes == null ? new ArrayList<>() : prefixes;

    if (!this.uris.isEmpty() && !this.prefixes.isEmpty()) {
      throw new IAE("uris and directories cannot be used together");
    }

    if (this.uris.isEmpty() && this.prefixes.isEmpty()) {
      throw new IAE("uris or directories must be specified");
    }

    for (final URI inputURI : this.uris) {
      Preconditions.checkArgument(inputURI.getScheme().equals("s3"), "input uri scheme == s3 (%s)", inputURI);
    }

    for (final URI inputURI : this.prefixes) {
      Preconditions.checkArgument(inputURI.getScheme().equals("s3"), "input uri scheme == s3 (%s)", inputURI);
    }
  }

  @JsonProperty
  public List getUris()
  {
    return uris;
  }

  @JsonProperty("prefixes")
  public List getPrefixes()
  {
    return prefixes;
  }

  @Override
  protected Collection initObjects() throws IOException
  {
    // Here, the returned s3 objects contain minimal information without data.
    // Getting data is deferred until openObjectStream() is called for each object.
    if (!uris.isEmpty()) {
      return uris.stream()
          .map(
              uri -> {
                final String s3Bucket = uri.getAuthority();
                final S3Object s3Object = new S3Object(extractS3Key(uri));
                s3Object.setBucketName(s3Bucket);
                return s3Object;
              }
          )
          .collect(Collectors.toList());
    } else {
      final List objects = new ArrayList<>();
      for (URI uri : prefixes) {
        final String bucket = uri.getAuthority();
        final String prefix = extractS3Key(uri);
        try {
          String lastKey = null;
          StorageObjectsChunk objectsChunk;
          do {
            objectsChunk = s3Client.listObjectsChunked(
                bucket,
                prefix,
                null,
                MAX_LISTING_LENGTH,
                lastKey
            );
            Arrays.stream(objectsChunk.getObjects()).forEach(storageObject -> objects.add((S3Object) storageObject));
            lastKey = objectsChunk.getPriorLastKey();
          } while (!objectsChunk.isListingComplete());
        }
        catch (ServiceException  e) {
          throw new IOException(e);
        }
      }
      return objects;
    }
  }

  private static String extractS3Key(URI uri)
  {
    return uri.getPath().startsWith("/")
           ? uri.getPath().substring(1)
           : uri.getPath();
  }

  @Override
  protected InputStream openObjectStream(S3Object object) throws IOException
  {
    log.info("Reading from bucket[%s] object[%s] (%s)", object.getBucketName(), object.getKey(), object);

    try {
      // Get data of the given object and open an input stream
      return s3Client.getObject(object.getBucketName(), object.getKey()).getDataInputStream();
    }
    catch (ServiceException e) {
      throw new IOException(e);
    }
  }

  @Override
  protected InputStream wrapObjectStream(S3Object object, InputStream stream) throws IOException
  {
    return object.getKey().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream;
  }

  @Override
  public boolean equals(Object o)
  {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    StaticS3FirehoseFactory factory = (StaticS3FirehoseFactory) o;

    return getUris().equals(factory.getUris());

  }

  @Override
  public int hashCode()
  {
    return getUris().hashCode();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy