All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.s3guard.MetadataStore Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a.s3guard;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.Retries.RetryTranslated;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.impl.StoreContext;

/**
 * {@code MetadataStore} defines the set of operations that any metadata store
 * implementation must provide.  Note that all {@link Path} objects provided
 * to methods must be absolute, not relative paths.
 * Implementations must implement any retries needed internally, such that
 * transient errors are generally recovered from without throwing exceptions
 * from this API.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface MetadataStore extends Closeable {

  /**
   * Performs one-time initialization of the metadata store.
   *
   * @param fs {@code FileSystem} associated with the MetadataStore
   * @param ttlTimeProvider the time provider to use for metadata expiry
   * @throws IOException if there is an error
   */
  void initialize(FileSystem fs, ITtlTimeProvider ttlTimeProvider)
      throws IOException;

  /**
   * Performs one-time initialization of the metadata store via configuration.
   * @see #initialize(FileSystem, ITtlTimeProvider)
   * @param conf Configuration.
   * @param ttlTimeProvider the time provider to use for metadata expiry
   * @throws IOException if there is an error
   */
  void initialize(Configuration conf,
      ITtlTimeProvider ttlTimeProvider) throws IOException;

  /**
   * Deletes exactly one path, leaving a tombstone to prevent lingering,
   * inconsistent copies of it from being listed.
   *
   * Deleting an entry with a tombstone needs a
   * {@link org.apache.hadoop.fs.s3a.s3guard.S3Guard.TtlTimeProvider} because
   * the lastUpdated field of the record has to be updated to 
now
. * * @param path the path to delete * @param operationState (nullable) operational state for a bulk update * @throws IOException if there is an error */ void delete(Path path, @Nullable BulkOperationState operationState) throws IOException; /** * Removes the record of exactly one path. Does not leave a tombstone (see * {@link MetadataStore#delete(Path, BulkOperationState)}. It is currently * intended for testing only, and a need to use it as part of normal * FileSystem usage is not anticipated. * * @param path the path to delete * @throws IOException if there is an error */ @VisibleForTesting void forgetMetadata(Path path) throws IOException; /** * Deletes the entire sub-tree rooted at the given path, leaving tombstones * to prevent lingering, inconsistent copies of it from being listed. * * In addition to affecting future calls to {@link #get(Path)}, * implementations must also update any stored {@code DirListingMetadata} * objects which track the parent of this file. * * Deleting a subtree with a tombstone needs a * {@link org.apache.hadoop.fs.s3a.s3guard.S3Guard.TtlTimeProvider} because * the lastUpdated field of all records have to be updated to
now
. * * @param path the root of the sub-tree to delete * @param operationState (nullable) operational state for a bulk update * @throws IOException if there is an error */ @Retries.RetryTranslated void deleteSubtree(Path path, @Nullable BulkOperationState operationState) throws IOException; /** * Delete the paths. * There's no attempt to order the paths: they are * deleted in the order passed in. * @param paths paths to delete. * @param operationState Nullable operation state * @throws IOException failure */ @RetryTranslated void deletePaths(Collection paths, @Nullable BulkOperationState operationState) throws IOException; /** * Gets metadata for a path. * * @param path the path to get * @return metadata for {@code path}, {@code null} if not found * @throws IOException if there is an error */ PathMetadata get(Path path) throws IOException; /** * Gets metadata for a path. Alternate method that includes a hint * whether or not the MetadataStore should do work to compute the value for * {@link PathMetadata#isEmptyDirectory()}. Since determining emptiness * may be an expensive operation, this can save wasted work. * * @param path the path to get * @param wantEmptyDirectoryFlag Set to true to give a hint to the * MetadataStore that it should try to compute the empty directory flag. * @return metadata for {@code path}, {@code null} if not found * @throws IOException if there is an error */ PathMetadata get(Path path, boolean wantEmptyDirectoryFlag) throws IOException; /** * Lists metadata for all direct children of a path. * * @param path the path to list * @return metadata for all direct children of {@code path} which are being * tracked by the MetadataStore, or {@code null} if the path was not found * in the MetadataStore. * @throws IOException if there is an error */ @Retries.RetryTranslated DirListingMetadata listChildren(Path path) throws IOException; /** * This adds all new ancestors of a path as directories. *

* Important: to propagate TTL information, any new ancestors added * must have their last updated timestamps set through * {@link S3Guard#patchLastUpdated(Collection, ITtlTimeProvider)}. * @param qualifiedPath path to update * @param operationState (nullable) operational state for a bulk update * @throws IOException failure */ @RetryTranslated void addAncestors(Path qualifiedPath, @Nullable BulkOperationState operationState) throws IOException; /** * Record the effects of a {@link FileSystem#rename(Path, Path)} in the * MetadataStore. Clients provide explicit enumeration of the affected * paths (recursively), before and after the rename. * * This operation is not atomic, unless specific implementations claim * otherwise. * * On the need to provide an enumeration of directory trees instead of just * source and destination paths: * Since a MetadataStore does not have to track all metadata for the * underlying storage system, and a new MetadataStore may be created on an * existing underlying filesystem, this move() may be the first time the * MetadataStore sees the affected paths. Therefore, simply providing src * and destination paths may not be enough to record the deletions (under * src path) and creations (at destination) that are happening during the * rename(). * * @param pathsToDelete Collection of all paths that were removed from the * source directory tree of the move. * @param pathsToCreate Collection of all PathMetadata for the new paths * that were created at the destination of the rename(). * @param operationState Any ongoing state supplied to the rename tracker * which is to be passed in with each move operation. * @throws IOException if there is an error */ void move(@Nullable Collection pathsToDelete, @Nullable Collection pathsToCreate, @Nullable BulkOperationState operationState) throws IOException; /** * Saves metadata for exactly one path. * * Implementations may pre-create all the path's ancestors automatically. * Implementations must update any {@code DirListingMetadata} objects which * track the immediate parent of this file. * * @param meta the metadata to save * @throws IOException if there is an error */ @RetryTranslated void put(PathMetadata meta) throws IOException; /** * Saves metadata for exactly one path, potentially * using any bulk operation state to eliminate duplicate work. * * Implementations may pre-create all the path's ancestors automatically. * Implementations must update any {@code DirListingMetadata} objects which * track the immediate parent of this file. * * @param meta the metadata to save * @param operationState operational state for a bulk update * @throws IOException if there is an error */ @RetryTranslated void put(PathMetadata meta, @Nullable BulkOperationState operationState) throws IOException; /** * Saves metadata for any number of paths. * * Semantics are otherwise the same as single-path puts. * * @param metas the metadata to save * @param operationState (nullable) operational state for a bulk update * @throws IOException if there is an error */ void put(Collection metas, @Nullable BulkOperationState operationState) throws IOException; /** * Save directory listing metadata. Callers may save a partial directory * listing for a given path, or may store a complete and authoritative copy * of the directory listing. {@code MetadataStore} implementations may * subsequently keep track of all modifications to the directory contents at * this path, and return authoritative results from subsequent calls to * {@link #listChildren(Path)}. See {@link DirListingMetadata}. * * Any authoritative results returned are only authoritative for the scope * of the {@code MetadataStore}: A per-process {@code MetadataStore}, for * example, would only show results visible to that process, potentially * missing metadata updates (create, delete) made to the same path by * another process. * * To optimize updates and avoid overwriting existing entries which * may contain extra data, entries in the list of unchangedEntries may * be excluded. That is: the listing metadata has the full list of * what it believes are children, but implementations can opt to ignore * some. * @param meta Directory listing metadata. * @param unchangedEntries list of entries in the dir listing which have * not changed since the directory was list scanned on s3guard. * @param operationState operational state for a bulk update * @throws IOException if there is an error */ void put(DirListingMetadata meta, final List unchangedEntries, @Nullable BulkOperationState operationState) throws IOException; /** * Destroy all resources associated with the metadata store. * * The destroyed resources can be DynamoDB tables, MySQL databases/tables, or * HDFS directories. Any operations after calling this method may possibly * fail. * * This operation is idempotent. * * @throws IOException if there is an error */ void destroy() throws IOException; /** * Prune method with two modes of operation: *

    *
  • * {@link PruneMode#ALL_BY_MODTIME} * Clear any metadata older than a specified mod_time from the store. * Note that this modification time is the S3 modification time from the * object's metadata - from the object store. * Implementations MUST clear file metadata, and MAY clear directory * metadata (s3a itself does not track modification time for directories). * Implementations may also choose to throw UnsupportedOperationException * instead. Note that modification times must be in UTC, as returned by * System.currentTimeMillis at the time of modification. *
  • *
* *
    *
  • * {@link PruneMode#TOMBSTONES_BY_LASTUPDATED} * Clear any tombstone updated earlier than a specified time from the * store. Note that this last_updated is the time when the metadata * entry was last updated and maintained by the metadata store. * Implementations MUST clear file metadata, and MAY clear directory * metadata (s3a itself does not track modification time for directories). * Implementations may also choose to throw UnsupportedOperationException * instead. Note that last_updated must be in UTC, as returned by * System.currentTimeMillis at the time of modification. *
  • *
* * @param pruneMode Prune Mode * @param cutoff Oldest time to allow (UTC) * @throws IOException if there is an error * @throws UnsupportedOperationException if not implemented */ void prune(PruneMode pruneMode, long cutoff) throws IOException, UnsupportedOperationException; /** * Same as {@link MetadataStore#prune(PruneMode, long)}, but with an * additional keyPrefix parameter to filter the pruned keys with a prefix. * * @param pruneMode Prune Mode * @param cutoff Oldest time in milliseconds to allow (UTC) * @param keyPrefix The prefix for the keys that should be removed * @throws IOException if there is an error * @throws UnsupportedOperationException if not implemented * @return the number of pruned entries */ long prune(PruneMode pruneMode, long cutoff, String keyPrefix) throws IOException, UnsupportedOperationException; /** * Get any diagnostics information from a store, as a list of (key, value) * tuples for display. Arbitrary values; no guarantee of stability. * These are for debugging and testing only. * @return a map of strings. * @throws IOException if there is an error */ Map getDiagnostics() throws IOException; /** * Tune/update parameters for an existing table. * @param parameters map of params to change. * @throws IOException if there is an error */ void updateParameters(Map parameters) throws IOException; /** * Mark all directories created/touched in an operation as authoritative. * The metastore can now update that path with any authoritative * flags it chooses. * The store may assume that therefore the operation state is complete. * This holds for rename and needs to be documented for import. * @param dest destination path. * @param operationState active state. * @throws IOException failure. * @return the number of directories marked. */ default int markAsAuthoritative(Path dest, BulkOperationState operationState) throws IOException { return 0; } /** * Modes of operation for prune. * For details see {@link MetadataStore#prune(PruneMode, long)} */ enum PruneMode { ALL_BY_MODTIME, TOMBSTONES_BY_LASTUPDATED } /** * Start a rename operation. * * @param storeContext store context. * @param source source path * @param sourceStatus status of the source file/dir * @param dest destination path. * @return the rename tracker * @throws IOException Failure. */ RenameTracker initiateRenameOperation( StoreContext storeContext, Path source, S3AFileStatus sourceStatus, Path dest) throws IOException; /** * Initiate a bulk update and create an operation state for it. * This may then be passed into put operations. * @param operation the type of the operation. * @param dest path under which updates will be explicitly put. * @return null or a store-specific state to pass into the put operations. * @throws IOException failure */ default BulkOperationState initiateBulkWrite( BulkOperationState.OperationType operation, Path dest) throws IOException { return new BulkOperationState(operation); } /** * The TtlTimeProvider has to be set during the initialization for the * metadatastore, but this method can be used for testing, and change the * instance during runtime. * * @param ttlTimeProvider */ void setTtlTimeProvider(ITtlTimeProvider ttlTimeProvider); /** * Get any S3GuardInstrumentation for this store...must not be null. * @return any store instrumentation. */ default MetastoreInstrumentation getInstrumentation() { return new MetastoreInstrumentationImpl(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy