All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.file.meta.AsyncUfsAbsentPathCache Maven / Gradle / Ivy

The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.file.meta;

import alluxio.AlluxioURI;
import alluxio.collections.Pair;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.InvalidPathException;
import alluxio.master.file.meta.options.MountInfo;
import alluxio.metrics.MetricKey;
import alluxio.metrics.MetricsSystem;
import alluxio.resource.CloseableResource;
import alluxio.underfs.UnderFileSystem;
import alluxio.util.ThreadFactoryUtils;
import alluxio.util.io.PathUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.concurrent.ThreadSafe;

/**
 * This is a {@link UfsAbsentPathCache} which implements asynchronous addition and removal to the
 * cache, since the processing of the path may be slow.
 */
@ThreadSafe
public class AsyncUfsAbsentPathCache implements UfsAbsentPathCache {
  private static final Logger LOG = LoggerFactory.getLogger(AsyncUfsAbsentPathCache.class);
  /** Number of seconds to keep threads alive. */
  private static final int THREAD_KEEP_ALIVE_SECONDS = 60;
  /** Number of paths to cache. */
  private static final int MAX_PATHS =
      Configuration.getInt(PropertyKey.MASTER_UFS_PATH_CACHE_CAPACITY);

  /** The mount table. */
  private final MountTable mMountTable;
  /** Paths currently being processed. This is used to prevent duplicate processing. */
  private final ConcurrentHashMap mCurrentPaths;
  /** Cache of paths which are absent in the ufs, maps an alluxio path to a Pair
   *  which is the sync time and the mount id.
   */
  private final Cache> mCache;
  /** A thread pool for the async tasks. */
  private final ThreadPoolExecutor mPool;

  private final Clock mClock;

  /**
   * Creates a new instance of {@link AsyncUfsAbsentPathCache}.
   *
   * @param mountTable the mount table
   * @param numThreads the maximum number of threads for the async thread pool
   * @param clock the clock to use to compute the sync times
   */
  public AsyncUfsAbsentPathCache(MountTable mountTable, int numThreads, Clock clock) {
    mMountTable = mountTable;
    mClock = clock;
    mCurrentPaths = new ConcurrentHashMap<>(8, 0.95f, 8);
    mCache = CacheBuilder.newBuilder().maximumSize(MAX_PATHS).concurrencyLevel(Configuration.getInt(
        PropertyKey.MASTER_UFS_PATH_CACHE_THREADS)).recordStats().build();
    /* Number of threads for the async pool. */

    mPool = new ThreadPoolExecutor(numThreads, numThreads, THREAD_KEEP_ALIVE_SECONDS,
        TimeUnit.SECONDS, new LinkedBlockingQueue<>(),
        ThreadFactoryUtils.build("UFS-Absent-Path-Cache-%d", true));
    mPool.allowCoreThreadTimeOut(true);
    long timeout = getCachedGaugeTimeoutMillis();
    MetricsSystem.registerCachedGaugeIfAbsent(MetricKey.MASTER_ABSENT_CACHE_SIZE.getName(),
        mCache::size, timeout, TimeUnit.MILLISECONDS);
    MetricsSystem.registerCachedGaugeIfAbsent(MetricKey.MASTER_ABSENT_CACHE_MISSES.getName(),
        () -> mCache.stats().missCount(), timeout, TimeUnit.MILLISECONDS);
    MetricsSystem.registerCachedGaugeIfAbsent(MetricKey.MASTER_ABSENT_CACHE_HITS.getName(),
        () -> mCache.stats().hitCount(), timeout, TimeUnit.MILLISECONDS);
    MetricsSystem.registerCachedGaugeIfAbsent(
        MetricKey.MASTER_ABSENT_PATH_CACHE_QUEUE_SIZE.getName(),
        () -> mPool.getQueue().size(), timeout, TimeUnit.MILLISECONDS);
  }

  @VisibleForTesting
  protected long getCachedGaugeTimeoutMillis() {
    return 2000;
  }

  @Override
  public void processAsync(AlluxioURI path, List prefixInodes) {
    mPool.submit(() -> processPathSync(path, prefixInodes));
  }

  @Override
  public void addSinglePath(AlluxioURI path) {
    MountInfo mountInfo = getMountInfo(path);
    if (mountInfo == null) {
      return;
    }
    addCacheEntry(path.getPath(), mountInfo);
  }

  @Override
  public void processExisting(AlluxioURI path) {
    MountInfo mountInfo = getMountInfo(path);
    if (mountInfo == null) {
      return;
    }
    // This is called when we create a persisted path in Alluxio. The path components need to be
    // invalidated so the cache does not incorrectly think a path is absent.
    // As an optimization, this method avoids holding locks, to prevent waiting on UFS. However,
    // since the locks are not being used in this code path, there could be a race between this
    // invalidating thread, and a processing of the path from the thread pool. To avoid the race,
    // this invalidating thread must set the intention to invalidate before invalidating.
    for (AlluxioURI alluxioUri : getNestedPaths(path, mountInfo.getAlluxioUri().getDepth())) {
      PathLock pathLock = mCurrentPaths.get(alluxioUri.getPath());
      if (pathLock != null) {
        pathLock.setInvalidate();
      }
      removeCacheEntry(alluxioUri.getPath());
    }
  }

  @Override
  public boolean isAbsentSince(AlluxioURI path, long absentSince) {
    MountInfo mountInfo = getMountInfo(path);
    if (mountInfo == null) {
      return false;
    }
    AlluxioURI mountBaseUri = mountInfo.getAlluxioUri();

    while (path != null && !path.equals(mountBaseUri)) {
      Pair cacheResult = mCache.getIfPresent(path.getPath());

      if (cacheResult != null && cacheResult.getFirst() != null
          && cacheResult.getSecond() != null
          && cacheResult.getFirst() >= absentSince
          && cacheResult.getSecond() == mountInfo.getMountId()) {
        return true;
      }
      path = path.getParent();
    }
    // Reached the root, without finding anything in the cache.
    return false;
  }

  /**
   * Processes and checks the existence of the corresponding ufs path for the given Alluxio path.
   *
   * @param alluxioUri the Alluxio path to process
   * @param mountInfo the associated {@link MountInfo} for the Alluxio path
   * @return if true, further traversal of the descendant paths should continue
   */
  private boolean processSinglePath(AlluxioURI alluxioUri, MountInfo mountInfo) {
    PathLock pathLock = new PathLock();
    Lock writeLock = pathLock.writeLock();
    Lock readLock = null;
    try {
      // Write lock this path, to only enable a single task per path
      writeLock.lock();
      PathLock existingLock = mCurrentPaths.putIfAbsent(alluxioUri.getPath(), pathLock);
      if (existingLock != null) {
        // Another thread already locked this path and is processing it. Wait for the other
        // thread to finish, by locking the existing read lock.
        writeLock.unlock();
        writeLock = null;
        readLock = existingLock.readLock();
        readLock.lock();

        if (mCache.getIfPresent(alluxioUri.getPath()) != null) {
          // This path is already in the cache (is absent). Further traversal is unnecessary.
          return false;
        }
      } else {
        // This thread has the exclusive lock for this path.

        // Resolve this Alluxio uri. It should match the original mount id.
        MountTable.Resolution resolution = mMountTable.resolve(alluxioUri);
        if (resolution.getMountId() != mountInfo.getMountId()) {
          // This mount point has changed. Further traversal is unnecessary.
          return false;
        }

        boolean existsInUfs;
        try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
          UnderFileSystem ufs = ufsResource.get();
          existsInUfs = ufs.exists(resolution.getUri().toString());
        }
        if (existsInUfs) {
          // This ufs path exists. Remove the cache entry.
          removeCacheEntry(alluxioUri.getPath());
        } else {
          // This is the first ufs path which does not exist. Add it to the cache.
          addCacheEntry(alluxioUri.getPath(), mountInfo);

          if (pathLock.isInvalidate()) {
            // This path was marked to be invalidated, meaning this UFS path was just created,
            // and now exists. Invalidate the entry.
            // This check is necessary to avoid the race with the invalidating thread.
            removeCacheEntry(alluxioUri.getPath());
          } else {
            // Further traversal is unnecessary.
            return false;
          }
        }
      }
    } catch (InvalidPathException | IOException e) {
      LOG.warn("Processing path failed: " + alluxioUri, e);
      return false;
    } finally {
      // Unlock the path
      if (readLock != null) {
        readLock.unlock();
      }
      if (writeLock != null) {
        mCurrentPaths.remove(alluxioUri.getPath(), pathLock);
        writeLock.unlock();
      }
    }
    return true;
  }

  /**
   * @param alluxioUri the Alluxio path to get the mount info for
   * @return the {@link MountInfo} of the given Alluxio path, or null if it doesn't exist
   */
  private MountInfo getMountInfo(AlluxioURI alluxioUri) {
    try {
      MountTable.Resolution resolution = mMountTable.resolve(alluxioUri);
      return mMountTable.getMountInfo(resolution.getMountId());
    } catch (Exception e) {
      // Catch Exception in case the mount point doesn't exist currently.
      LOG.warn("Failed to get mount info for path {}. message: {}", alluxioUri, e.toString());
      return null;
    }
  }

  /**
   * Returns a sequence of Alluxio paths for a specified path, starting from the path component at
   * a specific index, to the specified path.
   *
   * @param alluxioUri the Alluxio path to get the nested paths for
   * @param startComponentIndex the index to the starting path component,
   *        root directory has index 0
   * @return a list of nested paths from the starting component to the given path
   */
  private List getNestedPaths(AlluxioURI alluxioUri, int startComponentIndex) {
    try {
      String[] fullComponents = PathUtils.getPathComponents(alluxioUri.getPath());
      String[] baseComponents = Arrays.copyOfRange(fullComponents, 0, startComponentIndex);
      AlluxioURI uri = new AlluxioURI(
          PathUtils.concatPath(AlluxioURI.SEPARATOR, baseComponents));
      List components = new ArrayList<>(fullComponents.length - startComponentIndex);
      for (int i = startComponentIndex; i < fullComponents.length; i++) {
        uri = uri.joinUnsafe(fullComponents[i]);
        components.add(uri);
      }
      return components;
    } catch (InvalidPathException e) {
      return Collections.emptyList();
    }
  }

  /**
   * This represents a lock for a path component.
   */
  private static final class PathLock {
    private final ReadWriteLock mRwLock;
    private volatile boolean mInvalidate;

    private PathLock() {
      mRwLock = new ReentrantReadWriteLock();
      mInvalidate = false;
    }

    /**
     * @return the write lock
     */
    private Lock writeLock() {
      return mRwLock.writeLock();
    }

    /**
     * @return the read lock
     */
    private Lock readLock() {
      return mRwLock.readLock();
    }

    /**
     * Sets the intention to invalidate this path.
     */
    private void setInvalidate() {
      mInvalidate = true;
    }

    /**
     * @return true if the path was marked to be invalidated
     */
    private boolean isInvalidate() {
      return mInvalidate;
    }
  }

  private void addCacheEntry(String path, MountInfo mountInfo) {
    LOG.debug("Add cacheEntry={}", path);
    mCache.put(path, new Pair<>(mClock.millis(), mountInfo.getMountId()));
  }

  private void removeCacheEntry(String path) {
    LOG.debug("Remove cacheEntry={}", path);
    mCache.invalidate(path);
  }

  /**
   * Processes a path synchronously.
   *
   * @param path the path to add
   * @param prefixInodes the existing inodes for the path prefix
   */
  @VisibleForTesting
  void processPathSync(AlluxioURI path, List prefixInodes) {
    MountInfo mountInfo = getMountInfo(path);
    if (mountInfo == null) {
      return;
    }

    // baseIndex should be the index of the first non-persisted inode under the mount point.
    int baseIndex = mountInfo.getAlluxioUri().getDepth();
    while (baseIndex < prefixInodes.size()) {
      if (prefixInodes.get(baseIndex).isPersisted()) {
        baseIndex++;
      } else {
        break;
      }
    }

    for (AlluxioURI alluxioUri : getNestedPaths(path, baseIndex)) {
      if (!processSinglePath(alluxioUri, mountInfo)) {
        break;
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy