All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a.s3guard;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.apache.hadoop.fs.s3a.Constants.*;

/**
 * This is a local, in-memory implementation of MetadataStore.
 * This is not a coherent cache across processes.  It is only
 * locally-coherent.
 *
 * The purpose of this is for unit and integration testing.
 * It could also be used to accelerate local-only operations where only one
 * process is operating on a given object store, or multiple processes are
 * accessing a read-only storage bucket.
 *
 * This MetadataStore does not enforce filesystem rules such as disallowing
 * non-recursive removal of non-empty directories.  It is assumed the caller
 * already has to perform these sorts of checks.
 *
 * Contains one cache internally with time based eviction.
 */
public class LocalMetadataStore implements MetadataStore {

  public static final Logger LOG = LoggerFactory.getLogger(MetadataStore.class);

  /** Contains directory and file listings. */
  private Cache localCache;

  private FileSystem fs;
  /* Null iff this FS does not have an associated URI host. */
  private String uriHost;

  @Override
  public void initialize(FileSystem fileSystem) throws IOException {
    Preconditions.checkNotNull(fileSystem);
    fs = fileSystem;
    URI fsURI = fs.getUri();
    uriHost = fsURI.getHost();
    if (uriHost != null && uriHost.equals("")) {
      uriHost = null;
    }

    initialize(fs.getConf());
  }

  @Override
  public void initialize(Configuration conf) throws IOException {
    Preconditions.checkNotNull(conf);
    int maxRecords = conf.getInt(S3GUARD_METASTORE_LOCAL_MAX_RECORDS,
        DEFAULT_S3GUARD_METASTORE_LOCAL_MAX_RECORDS);
    if (maxRecords < 4) {
      maxRecords = 4;
    }
    int ttl = conf.getInt(S3GUARD_METASTORE_LOCAL_ENTRY_TTL,
        DEFAULT_S3GUARD_METASTORE_LOCAL_ENTRY_TTL);

    CacheBuilder builder = CacheBuilder.newBuilder().maximumSize(maxRecords);
    if (ttl >= 0) {
      builder.expireAfterAccess(ttl, TimeUnit.MILLISECONDS);
    }

    localCache = builder.build();
  }

  @Override
  public String toString() {
    final StringBuilder sb = new StringBuilder(
        "LocalMetadataStore{");
    sb.append("uriHost='").append(uriHost).append('\'');
    sb.append('}');
    return sb.toString();
  }

  @Override
  public void delete(Path p) throws IOException {
    doDelete(p, false, true);
  }

  @Override
  public void forgetMetadata(Path p) throws IOException {
    doDelete(p, false, false);
  }

  @Override
  public void deleteSubtree(Path path) throws IOException {
    doDelete(path, true, true);
  }

  private synchronized void doDelete(Path p, boolean recursive, boolean
      tombstone) {

    Path path = standardize(p);

    // Delete entry from file cache, then from cached parent directory, if any

    deleteCacheEntries(path, tombstone);

    if (recursive) {
      // Remove all entries that have this dir as path prefix.
      deleteEntryByAncestor(path, localCache, tombstone);
    }
  }

  @Override
  public synchronized PathMetadata get(Path p) throws IOException {
    return get(p, false);
  }

  @Override
  public PathMetadata get(Path p, boolean wantEmptyDirectoryFlag)
      throws IOException {
    Path path = standardize(p);
    synchronized (this) {
      PathMetadata m = getFileMeta(path);

      if (wantEmptyDirectoryFlag && m != null &&
          m.getFileStatus().isDirectory()) {
        m.setIsEmptyDirectory(isEmptyDirectory(p));
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("get({}) -> {}", path, m == null ? "null" : m.prettyPrint());
      }
      return m;
    }
  }

  /**
   * Determine if directory is empty.
   * Call with lock held.
   * @param p a Path, already filtered through standardize()
   * @return TRUE / FALSE if known empty / not-empty, UNKNOWN otherwise.
   */
  private Tristate isEmptyDirectory(Path p) {
    DirListingMetadata dlm = getDirListingMeta(p);
    return dlm.withoutTombstones().isEmpty();
  }

  @Override
  public synchronized DirListingMetadata listChildren(Path p) throws
      IOException {
    Path path = standardize(p);
    DirListingMetadata listing = getDirListingMeta(path);
    if (LOG.isDebugEnabled()) {
      LOG.debug("listChildren({}) -> {}", path,
          listing == null ? "null" : listing.prettyPrint());
    }
    // Make a copy so callers can mutate without affecting our state
    return listing == null ? null : new DirListingMetadata(listing);
  }

  @Override
  public void move(Collection pathsToDelete,
      Collection pathsToCreate) throws IOException {
    LOG.info("Move {} to {}", pathsToDelete, pathsToCreate);

    Preconditions.checkNotNull(pathsToDelete, "pathsToDelete is null");
    Preconditions.checkNotNull(pathsToCreate, "pathsToCreate is null");
    Preconditions.checkArgument(pathsToDelete.size() == pathsToCreate.size(),
        "Must supply same number of paths to delete/create.");

    // I feel dirty for using reentrant lock. :-|
    synchronized (this) {

      // 1. Delete pathsToDelete
      for (Path meta : pathsToDelete) {
        LOG.debug("move: deleting metadata {}", meta);
        delete(meta);
      }

      // 2. Create new destination path metadata
      for (PathMetadata meta : pathsToCreate) {
        LOG.debug("move: adding metadata {}", meta);
        put(meta);
      }

      // 3. We now know full contents of all dirs in destination subtree
      for (PathMetadata meta : pathsToCreate) {
        FileStatus status = meta.getFileStatus();
        if (status == null || status.isDirectory()) {
          continue;
        }
        DirListingMetadata dir = listChildren(status.getPath());
        if (dir != null) {  // could be evicted already
          dir.setAuthoritative(true);
        }
      }
    }
  }

  @Override
  public void put(PathMetadata meta) throws IOException {

    Preconditions.checkNotNull(meta);
    FileStatus status = meta.getFileStatus();
    Path path = standardize(status.getPath());
    synchronized (this) {

      /* Add entry for this file. */
      if (LOG.isDebugEnabled()) {
        LOG.debug("put {} -> {}", path, meta.prettyPrint());
      }
      LocalMetadataEntry entry = localCache.getIfPresent(path);
      if(entry == null){
        entry = new LocalMetadataEntry(meta);
      } else {
        entry.setPathMetadata(meta);
      }

      /* Directory case:
       * We also make sure we have an entry in the dirCache, so subsequent
       * listStatus(path) at least see the directory.
       *
       * If we had a boolean flag argument "isNew", we would know whether this
       * is an existing directory the client discovered via getFileStatus(),
       * or if it is a newly-created directory.  In the latter case, we would
       * be able to mark the directory as authoritative (fully-cached),
       * saving round trips to underlying store for subsequent listStatus()
       */

      // only create DirListingMetadata if the entry does not have one
      if (status.isDirectory() && !entry.hasDirMeta()) {
        DirListingMetadata dlm =
            new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR, false);
        entry.setDirListingMetadata(dlm);
      }
      localCache.put(path, entry);

      /* Update cached parent dir. */
      Path parentPath = path.getParent();
      if (parentPath != null) {
        LocalMetadataEntry parentMeta = localCache.getIfPresent(parentPath);
        DirListingMetadata parentDirMeta =
            new DirListingMetadata(parentPath, DirListingMetadata.EMPTY_DIR,
                false);
        parentDirMeta.put(status);

        getDirListingMeta(parentPath);

        if (parentMeta == null){
          localCache.put(parentPath, new LocalMetadataEntry(parentDirMeta));
        } else if (!parentMeta.hasDirMeta()) {
          parentMeta.setDirListingMetadata(parentDirMeta);
        } else {
          parentMeta.getDirListingMeta().put(status);
        }
      }
    }
  }

  @Override
  public synchronized void put(DirListingMetadata meta) throws IOException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("put dirMeta {}", meta.prettyPrint());
    }
    LocalMetadataEntry entry =
        localCache.getIfPresent(standardize(meta.getPath()));
    if(entry == null){
      localCache.put(standardize(meta.getPath()), new LocalMetadataEntry(meta));
    } else {
      entry.setDirListingMetadata(meta);
    }
    put(meta.getListing());
  }

  public synchronized void put(Collection metas) throws
      IOException {
    for (PathMetadata meta : metas) {
      put(meta);
    }
  }

  @Override
  public void close() throws IOException {

  }

  @Override
  public void destroy() throws IOException {
    if (localCache != null) {
      localCache.invalidateAll();
    }
  }

  @Override
  public void prune(long modTime) throws IOException{
    prune(modTime, "");
  }

  @Override
  public synchronized void prune(long modTime, String keyPrefix) {
    // prune files
    // filter path_metadata (files), filter expired, remove expired
    localCache.asMap().entrySet().stream()
        .filter(entry -> entry.getValue().hasPathMeta())
        .filter(entry -> expired(
            entry.getValue().getFileMeta().getFileStatus(), modTime, keyPrefix))
        .forEach(entry -> localCache.invalidate(entry.getKey()));


    // prune dirs
    // filter DIR_LISTING_METADATA, remove expired, remove authoritative bit
    localCache.asMap().entrySet().stream()
        .filter(entry -> entry.getValue().hasDirMeta())
        .forEach(entry -> {
          Path path = entry.getKey();
          DirListingMetadata metadata = entry.getValue().getDirListingMeta();
          Collection oldChildren = metadata.getListing();
          Collection newChildren = new LinkedList<>();

          for (PathMetadata child : oldChildren) {
            FileStatus status = child.getFileStatus();
            if (!expired(status, modTime, keyPrefix)) {
              newChildren.add(child);
            }
          }
          if (newChildren.size() != oldChildren.size()) {
            DirListingMetadata dlm =
                new DirListingMetadata(path, newChildren, false);
            localCache.put(path, new LocalMetadataEntry(dlm));
            if (!path.isRoot()) {
              DirListingMetadata parent = getDirListingMeta(path.getParent());
              if (parent != null) {
                parent.setAuthoritative(false);
              }
            }
          }
        });
  }

  private boolean expired(FileStatus status, long expiry, String keyPrefix) {
    // remove the protocol from path string to be able to compare
    String bucket = status.getPath().toUri().getHost();
    String statusTranslatedPath = "";
    if(bucket != null && !bucket.isEmpty()){
      // if there's a bucket, (well defined host in Uri) the pathToParentKey
      // can be used to get the path from the status
      statusTranslatedPath =
          PathMetadataDynamoDBTranslation.pathToParentKey(status.getPath());
    } else {
      // if there's no bucket in the path the pathToParentKey will fail, so
      // this is the fallback to get the path from status
      statusTranslatedPath = status.getPath().toUri().getPath();
    }

    // Note: S3 doesn't track modification time on directories, so for
    // consistency with the DynamoDB implementation we ignore that here
    return status.getModificationTime() < expiry && !status.isDirectory()
      && statusTranslatedPath.startsWith(keyPrefix);
  }

  @VisibleForTesting
  static void deleteEntryByAncestor(Path ancestor,
      Cache cache, boolean tombstone) {

    cache.asMap().entrySet().stream()
        .filter(entry -> isAncestorOf(ancestor, entry.getKey()))
        .forEach(entry -> {
          LocalMetadataEntry meta = entry.getValue();
          Path path = entry.getKey();
          if(meta.hasDirMeta()){
            cache.invalidate(path);
          } else if(tombstone && meta.hasPathMeta()){
            meta.setPathMetadata(PathMetadata.tombstone(path));
          } else {
            cache.invalidate(path);
          }
        });
  }

  /**
   * @return true if 'ancestor' is ancestor dir in path 'f'.
   * All paths here are absolute.  Dir does not count as its own ancestor.
   */
  private static boolean isAncestorOf(Path ancestor, Path f) {
    String aStr = ancestor.toString();
    if (!ancestor.isRoot()) {
      aStr += "/";
    }
    String fStr = f.toString();
    return (fStr.startsWith(aStr));
  }

  /**
   * Update fileCache and dirCache to reflect deletion of file 'f'.  Call with
   * lock held.
   */
  private void deleteCacheEntries(Path path, boolean tombstone) {
    LocalMetadataEntry entry = localCache.getIfPresent(path);
    // If there's no entry, delete should silently succeed
    // (based on MetadataStoreTestBase#testDeleteNonExisting)
    if(entry == null){
      LOG.warn("Delete: path {} is missing from cache.", path);
      return;
    }

    // Remove target file entry
    LOG.debug("delete file entry for {}", path);
    if(entry.hasPathMeta()){
      if (tombstone) {
        PathMetadata pmd = PathMetadata.tombstone(path);
        entry.setPathMetadata(pmd);
      } else {
        entry.setPathMetadata(null);
      }
    }

    // If this path is a dir, remove its listing
    if(entry.hasDirMeta()) {
      LOG.debug("removing listing of {}", path);
      entry.setDirListingMetadata(null);
    }

    // If the entry is empty (contains no dirMeta or pathMeta) remove it from
    // the cache.
    if(!entry.hasDirMeta() && !entry.hasPathMeta()){
      localCache.invalidate(entry);
    }

    /* Remove this path from parent's dir listing */
    Path parent = path.getParent();
    if (parent != null) {
      DirListingMetadata dir = getDirListingMeta(parent);
      if (dir != null) {
        LOG.debug("removing parent's entry for {} ", path);
        if (tombstone) {
          dir.markDeleted(path);
        } else {
          dir.remove(path);
        }
      }
    }
  }

  /**
   * Return a "standardized" version of a path so we always have a consistent
   * hash value.  Also asserts the path is absolute, and contains host
   * component.
   * @param p input Path
   * @return standardized version of Path, suitable for hash key
   */
  private Path standardize(Path p) {
    Preconditions.checkArgument(p.isAbsolute(), "Path must be absolute");
    URI uri = p.toUri();
    if (uriHost != null) {
      Preconditions.checkArgument(StringUtils.isNotEmpty(uri.getHost()));
    }
    return p;
  }

  @Override
  public Map getDiagnostics() throws IOException {
    Map map = new HashMap<>();
    map.put("name", "local://metadata");
    map.put("uriHost", uriHost);
    map.put("description", "Local in-VM metadata store for testing");
    map.put(MetadataStoreCapabilities.PERSISTS_AUTHORITATIVE_BIT,
        Boolean.toString(true));
    return map;
  }

  @Override
  public void updateParameters(Map parameters)
      throws IOException {
  }

  PathMetadata getFileMeta(Path p){
    LocalMetadataEntry entry = localCache.getIfPresent(p);
    if(entry != null && entry.hasPathMeta()){
      return entry.getFileMeta();
    } else {
      return null;
    }
  }

  DirListingMetadata getDirListingMeta(Path p){
    LocalMetadataEntry entry = localCache.getIfPresent(p);
    if(entry != null && entry.hasDirMeta()){
      return entry.getDirListingMeta();
    } else {
      return null;
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy