All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.data.input.InputEntity Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.data.input;

import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import org.apache.druid.guice.annotations.UnstableApi;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;

/**
 * InputEntity abstracts an input entity and knows how to read bytes from the given entity.
 * Since the implementations of this interface assume that the given entity is not empty, the InputSources
 * should not create InputEntities for empty entities.
 */
@UnstableApi
public interface InputEntity
{
  Logger LOG = new Logger(InputEntity.class);

  int DEFAULT_FETCH_BUFFER_SIZE = 4 * 1024; // 4 KiB
  int DEFAULT_MAX_NUM_FETCH_TRIES = 3; // 3 tries including the initial try

  /**
   * CleanableFile is the result type of {@link #fetch}.
   * It should clean up any temporary resource on {@link #close()}.
   */
  interface CleanableFile extends Closeable
  {
    File file();
  }

  /**
   * Returns an URI to identify the input entity. Implementations can return null if they don't have
   * an unique URI.
   */
  @Nullable
  URI getUri();

  /**
   * Opens an {@link InputStream} on the input entity directly.
   * This is the basic way to read the given entity.
   * This method may be called multiple times to re-read the data from the entity.
   *
   * @see #fetch
   */
  InputStream open() throws IOException;

  /**
   * Fetches the input entity into the local storage.
   * This method might be preferred instead of {@link #open()}, for example
   *
   * - {@link InputFormat} requires expensive random access on remote storage.
   * - Holding a connection until you consume the entire InputStream is expensive.
   *
   * @param temporaryDirectory to store temp data. This directory will be removed automatically once
   *                           the task finishes.
   * @param fetchBuffer        is used to fetch remote entity into local storage.
   *
   * @see FileUtils#copyLarge
   */
  default CleanableFile fetch(File temporaryDirectory, byte[] fetchBuffer) throws IOException
  {
    final File tempFile = File.createTempFile("druid-input-entity", ".tmp", temporaryDirectory);
    LOG.debug("Fetching entity into file[%s]", tempFile.getAbsolutePath());

    final URI uri = getUri();
    final String message;

    if (uri == null) {
      message = StringUtils.format("Failed to fetch entity into local file [%s]", tempFile.getAbsolutePath());
    } else {
      message = StringUtils.format("Failed to fetch entity [%s] into local file [%s]", uri, tempFile.getAbsolutePath());
    }

    FileUtils.copyLarge(
        this::open,
        tempFile,
        fetchBuffer,
        getRetryCondition(),
        DEFAULT_MAX_NUM_FETCH_TRIES,
        message
    );

    return new CleanableFile()
    {
      @Override
      public File file()
      {
        return tempFile;
      }

      @Override
      public void close()
      {
        if (!tempFile.delete()) {
          LOG.warn("Failed to remove file[%s]", tempFile.getAbsolutePath());
        }
      }
    };
  }

  /**
   * Returns a retry condition that the caller should retry on.
   * The returned condition should be used when reading data from this InputEntity such as in {@link #fetch}
   * or {@link RetryingInputEntity}.
   */
  default Predicate getRetryCondition()
  {
    return Predicates.alwaysFalse();
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy