All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.storage.HoodieStorage Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.storage;

import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.SeekableDataInputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/**
 * Provides I/O APIs on files and directories on storage.
 * The APIs are mainly based on {@code org.apache.hadoop.fs.FileSystem} class.
 */
@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
public abstract class HoodieStorage implements Closeable {
  public static final Logger LOG = LoggerFactory.getLogger(HoodieStorage.class);

  protected final StorageConfiguration storageConf;

  public HoodieStorage(StorageConfiguration storageConf) {
    this.storageConf = storageConf;
  }

  /**
   * @param path        path to instantiate the storage.
   * @param storageConf new storage configuration.
   * @return new {@link HoodieStorage} instance with the configuration.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract HoodieStorage newInstance(StoragePath path,
                                            StorageConfiguration storageConf);

  /**
   * @return the scheme of the storage.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract String getScheme();

  /**
   * @return the default block size.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract int getDefaultBlockSize(StoragePath path);

  /**
   * @return the default buffer size.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract int getDefaultBufferSize();

  /**
   * @return the default block replication
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract short getDefaultReplication(StoragePath path);

  /**
   * Returns a URI which identifies this HoodieStorage.
   *
   * @return the URI of this storage instance.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract URI getUri();

  /**
   * Creates an OutputStream at the indicated path.
   *
   * @param path      the file to create.
   * @param overwrite if a file with this name already exists, then if {@code true},
   *                  the file will be overwritten, and if {@code false} an exception will be thrown.
   * @return the OutputStream to write to.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract OutputStream create(StoragePath path, boolean overwrite) throws IOException;

  /**
   * Creates an OutputStream at the indicated path.
   *
   * @param path          the file to create
   * @param overwrite     if a file with this name already exists, then if {@code true},
   *                      the file will be overwritten, and if {@code false} an exception will be thrown.
   * @param bufferSize    the size of the buffer to be used
   * @param replication   required block replication for the file
   * @param sizeThreshold block size
   * @return the OutputStream to write to.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract OutputStream create(StoragePath path, boolean overwrite, Integer bufferSize, Short replication, Long sizeThreshold) throws IOException;

  /**
   * Opens an InputStream at the indicated path.
   *
   * @param path the file to open.
   * @return the InputStream to read from.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract InputStream open(StoragePath path) throws IOException;

  /**
   * Opens an SeekableDataInputStream at the indicated path with seeks supported.
   *
   * @param path       the file to open.
   * @param bufferSize buffer size to use.
   * @param wrapStream true if we want to wrap the inputstream based on filesystem specific criteria
   * @return the InputStream to read from.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract SeekableDataInputStream openSeekable(StoragePath path, int bufferSize, boolean wrapStream) throws IOException;

  /**
   * Appends to an existing file (optional operation).
   *
   * @param path the file to append.
   * @return the OutputStream to write to.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract OutputStream append(StoragePath path) throws IOException;

  /**
   * Checks if a path exists.
   *
   * @param path to check.
   * @return {@code true} if the path exists.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract boolean exists(StoragePath path) throws IOException;

  /**
   * Returns a {@link StoragePathInfo} object that represents the path.
   *
   * @param path to check.
   * @return a {@link StoragePathInfo} object.
   * @throws FileNotFoundException when the path does not exist.
   * @throws IOException           IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract StoragePathInfo getPathInfo(StoragePath path) throws IOException;

  /**
   * Creates the directory and non-existent parent directories.
   *
   * @param path to create.
   * @return {@code true} if the directory was created.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract boolean createDirectory(StoragePath path) throws IOException;

  /**
   * Lists the path info of the direct files/directories in the given path if the path is a directory.
   *
   * @param path given path.
   * @return the list of path info of the files/directories in the given path.
   * @throws FileNotFoundException when the path does not exist.
   * @throws IOException           IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract List listDirectEntries(StoragePath path) throws IOException;

  /**
   * Lists the path info of all files under the give path recursively.
   *
   * @param path given path.
   * @return the list of path info of the files under the given path.
   * @throws FileNotFoundException when the path does not exist.
   * @throws IOException           IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract List listFiles(StoragePath path) throws IOException;

  /**
   * Lists the path info of the direct files/directories in the given path
   * and filters the results, if the path is a directory.
   *
   * @param path   given path.
   * @param filter filter to apply.
   * @return the list of path info of the files/directories in the given path.
   * @throws FileNotFoundException when the path does not exist.
   * @throws IOException           IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract List listDirectEntries(StoragePath path,
                                                          StoragePathFilter filter) throws IOException;

  /**
   * Sets Modification Time for the storage Path
   * @param path
   * @param modificationTimeInMillisEpoch Millis since Epoch
   * @throws IOException
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract void setModificationTime(StoragePath path, long modificationTimeInMillisEpoch) throws IOException;

  /**
   * Returns all the files that match the pathPattern and are not checksum files,
   * and filters the results.
   *
   * @param pathPattern given pattern.
   * @param filter      filter to apply.
   * @return the list of path info of the files.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract List globEntries(StoragePath pathPattern,
                                                    StoragePathFilter filter) throws IOException;

  /**
   * Renames the path from old to new.
   *
   * @param oldPath source path.
   * @param newPath destination path.
   * @return {@true} if rename is successful.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract boolean rename(StoragePath oldPath,
                                 StoragePath newPath) throws IOException;

  /**
   * Deletes a directory at path.
   *
   * @param path directory to delete.
   * @return {@code true} if successful.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract boolean deleteDirectory(StoragePath path) throws IOException;

  /**
   * Deletes a file at path.
   *
   * @param path file to delete.
   * @return {@code true} if successful.
   * @throws IOException IO error.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract boolean deleteFile(StoragePath path) throws IOException;

  /**
   * @return the underlying file system instance if exists.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract Object getFileSystem();

  /**
   * @return the raw storage.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public abstract HoodieStorage getRawStorage();

  /**
   * @return the storage configuration.
   */
  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
  public final StorageConfiguration getConf() {
    return storageConf;
  }

  /**
   * Creates a new file with overwrite set to false. This ensures files are created
   * only once and never rewritten, also, here we take care if the content is not
   * empty, will first write the content to a temp file if {needCreateTempFile} is
   * true, and then rename it back after the content is written.
   *
   * 

CAUTION: if this method is invoked in multi-threads for concurrent write of the same file, * an existence check of the file is recommended. * * @param path File path. * @param content Content to be stored. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public final void createImmutableFileInPath(StoragePath path, Option content) throws HoodieIOException { OutputStream fsout = null; StoragePath tmpPath = null; boolean needTempFile = needCreateTempFile(); try { if (!content.isPresent()) { fsout = create(path, false); } if (content.isPresent() && needTempFile) { StoragePath parent = path.getParent(); tmpPath = new StoragePath(parent, path.getName() + "." + UUID.randomUUID()); fsout = create(tmpPath, false); fsout.write(content.get()); } if (content.isPresent() && !needTempFile) { fsout = create(path, false); fsout.write(content.get()); } } catch (IOException e) { String errorMsg = "Failed to create file " + (tmpPath != null ? tmpPath : path); throw new HoodieIOException(errorMsg, e); } finally { try { if (null != fsout) { fsout.close(); } } catch (IOException e) { String errorMsg = "Failed to close file " + (needTempFile ? tmpPath : path); throw new HoodieIOException(errorMsg, e); } boolean renameSuccess = false; try { if (null != tmpPath) { renameSuccess = rename(tmpPath, path); } } catch (IOException e) { throw new HoodieIOException( "Failed to rename " + tmpPath + " to the target " + path, e); } finally { if (!renameSuccess && null != tmpPath) { try { deleteFile(tmpPath); LOG.warn("Fail to rename " + tmpPath + " to " + path + ", target file exists: " + exists(path)); } catch (IOException e) { throw new HoodieIOException("Failed to delete tmp file " + tmpPath, e); } } } } } /** * @return whether a temporary file needs to be created for immutability. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public final boolean needCreateTempFile() { return StorageSchemes.HDFS.getScheme().equals(getScheme()); } /** * Create an OutputStream at the indicated path. * The file is overwritten by default. * * @param path the file to create. * @return the OutputStream to write to. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public OutputStream create(StoragePath path) throws IOException { return create(path, true); } /** * Creates an empty new file at the indicated path. * * @param path the file to create. * @return {@code true} if successfully created; {@code false} if already exists. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public boolean createNewFile(StoragePath path) throws IOException { if (exists(path)) { return false; } else { create(path, false).close(); return true; } } /** * Opens an SeekableDataInputStream at the indicated path with seeks supported. * * @param path the file to open. * @param wrapStream true if we want to wrap the inputstream based on filesystem specific criteria * @return the InputStream to read from. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public SeekableDataInputStream openSeekable(StoragePath path, boolean wrapStream) throws IOException { return openSeekable(path, getDefaultBlockSize(path), wrapStream); } /** * Lists the file info of the direct files/directories in the given list of paths, * if the paths are directory. * * @param pathList given path list. * @return the list of path info of the files/directories in the given paths. * @throws FileNotFoundException when the path does not exist. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public List listDirectEntries(List pathList) throws IOException { List result = new ArrayList<>(); for (StoragePath path : pathList) { result.addAll(listDirectEntries(path)); } return result; } /** * Lists the file info of the direct files/directories in the given list of paths * and filters the results, if the paths are directory. * * @param pathList given path list. * @param filter filter to apply. * @return the list of path info of the files/directories in the given paths. * @throws FileNotFoundException when the path does not exist. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public List listDirectEntries(List pathList, StoragePathFilter filter) throws IOException { List result = new ArrayList<>(); for (StoragePath path : pathList) { result.addAll(listDirectEntries(path, filter)); } return result; } /** * Returns all the files that match the pathPattern and are not checksum files. * * @param pathPattern given pattern. * @return the list of file info of the files. * @throws IOException IO error. */ @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public List globEntries(StoragePath pathPattern) throws IOException { return globEntries(pathPattern, e -> true); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy