alluxio.underfs.UfsStatusCache Maven / Gradle / Ivy
The newest version!
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.underfs;
import alluxio.AlluxioURI;
import alluxio.collections.UnmodifiableArrayList;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.InvalidPathException;
import alluxio.master.file.DefaultFileSystemMaster;
import alluxio.master.file.RpcContext;
import alluxio.master.file.meta.MountTable;
import alluxio.master.file.meta.UfsAbsentPathCache;
import alluxio.resource.CloseableResource;
import alluxio.util.LogUtils;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
/**
* This class is a cache from an Alluxio namespace URI ({@link AlluxioURI}, i.e. /path/to/inode) to
* UFS statuses.
*
* It also allows associating a path with child inodes, so that the statuses for a specific path can
* be searched for later.
*/
@ThreadSafe
public class UfsStatusCache {
private static final Logger LOG = LoggerFactory.getLogger(UfsStatusCache.class);
private final ConcurrentHashMap mStatuses;
private final ConcurrentHashMap>> mActivePrefetchJobs;
private final ConcurrentHashMap> mChildren;
private final UfsAbsentPathCache mAbsentCache;
private final long mCacheValidTime;
private final ExecutorService mPrefetchExecutor;
private final long mUfsFetchTimeout;
/**
* Create a new instance of {@link UfsStatusCache}.
*
* @param prefetchExecutor the executor service used to prefetch statuses. If set to null, then
* calls to {@link #prefetchChildren(AlluxioURI, MountTable)} will not
* schedule any tasks.
* @param absentPathCache the absent cache that ufsStatusCache should consult
* @param cacheValidTime the time when the absent cache entry would be considered valid
*/
public UfsStatusCache(@Nullable ExecutorService prefetchExecutor,
UfsAbsentPathCache absentPathCache, long cacheValidTime) {
mStatuses = new ConcurrentHashMap<>();
mChildren = new ConcurrentHashMap<>();
mActivePrefetchJobs = new ConcurrentHashMap<>();
mAbsentCache = absentPathCache;
mCacheValidTime = cacheValidTime;
mPrefetchExecutor = prefetchExecutor;
mUfsFetchTimeout =
Configuration.getMs(PropertyKey.MASTER_METADATA_SYNC_UFS_PREFETCH_TIMEOUT);
}
/**
* Add a new status to the cache.
*
* The last component of the path in the {@link AlluxioURI} must match the result of
* {@link UfsStatus#getName()}. This method overrides any status currently cached for the same
* URI.
*
* @param path the Alluxio path to key on
* @param status the ufs status to store
* @return the previous status for the path if it existed, null otherwise
* @throws IllegalArgumentException if the status name doesn't match the final URI path component
*/
@Nullable
public UfsStatus addStatus(AlluxioURI path, UfsStatus status) {
if (!path.getName().equals(status.getName())) {
throw new IllegalArgumentException(
String.format("path name %s does not match ufs status name %s",
path.getName(), status.getName()));
}
return addStatusUnchecked(path, status);
}
// unchecked: path and status must have the same name
private UfsStatus addStatusUnchecked(AlluxioURI path, UfsStatus status) {
mAbsentCache.processExisting(path);
UfsStatus previousStatus = mStatuses.put(path, status);
if (previousStatus == null) {
// Update global counters for all InodeSyncStream
DefaultFileSystemMaster.Metrics.UFS_STATUS_CACHE_SIZE_TOTAL.inc();
}
return previousStatus;
}
/**
* Add a parent-child mapping to the status cache.
*
* All child statuses added via this method will be available via {@link #getStatus(AlluxioURI)}.
*
* @param path the directory inode path which contains the children
* @param children the children of the {@code path}
* @return the previous set of children if the mapping existed, null otherwise
*/
@Nullable
public Collection addChildren(AlluxioURI path, Collection children) {
children.forEach(child -> {
AlluxioURI childPath = path.joinUnsafe(child.getName());
// childPath is derived from its UFS status, therefore has the same name
addStatusUnchecked(childPath, child);
});
Collection previousStatuses = mChildren.put(path, children);
// Update global counters for all InodeSyncStream
int sizeChange = children.size() - (previousStatuses != null ? previousStatuses.size() : 0);
DefaultFileSystemMaster.Metrics.UFS_STATUS_CACHE_CHILDREN_SIZE_TOTAL.inc(sizeChange);
return previousStatuses;
}
/**
* Remove a status from the cache.
*
* This will remove any references to child {@link UfsStatus}.
*
* @param path the path corresponding to the {@link UfsStatus} to remove
* @return the removed UfsStatus
*/
@Nullable
public UfsStatus remove(AlluxioURI path) {
Preconditions.checkNotNull(path, "can't remove null status cache path");
UfsStatus removed = mStatuses.remove(path);
if (removed != null) {
// Update global counters for all InodeSyncStream
DefaultFileSystemMaster.Metrics.UFS_STATUS_CACHE_SIZE_TOTAL.dec();
}
Collection removedChildren = mChildren.remove(path);
if (removedChildren != null) {
DefaultFileSystemMaster.Metrics.UFS_STATUS_CACHE_SIZE_TOTAL.dec(removedChildren.size());
DefaultFileSystemMaster.Metrics
.UFS_STATUS_CACHE_CHILDREN_SIZE_TOTAL.dec(removedChildren.size());
}
return removed;
}
private void checkAbsentCache(AlluxioURI path) throws FileNotFoundException {
if (mAbsentCache.isAbsentSince(path, mCacheValidTime)) {
throw new FileNotFoundException("UFS Status not found for path " + path.toString());
}
}
/**
* Get the UfsStatus from a given AlluxioURI.
*
* @param path the path the retrieve
* @return The corresponding {@link UfsStatus} or {@code null} if there is none stored
* @throws FileNotFoundException if the UFS does not contain the file
*/
@Nullable
public UfsStatus getStatus(AlluxioURI path) throws FileNotFoundException {
checkAbsentCache(path);
return mStatuses.get(path);
}
/**
* Check if a status has been loaded in the cache for the given AlluxioURI.
* Same as {@link UfsStatusCache#getStatus(AlluxioURI)} except does not
* check the absent cache.
*
* @param path the path the retrieve
* @return The corresponding {@link UfsStatus}
*/
public Optional hasStatus(AlluxioURI path) {
return Optional.ofNullable(mStatuses.get(path));
}
/**
* Attempts to return a status from the cache. If it doesn't exist, reaches to the UFS for it.
*
* @param path the path the retrieve
* @param mountTable the Alluxio mount table
* @return The corresponding {@link UfsStatus} or {@code null} if there is none stored
*/
@Nullable
public UfsStatus fetchStatusIfAbsent(AlluxioURI path, MountTable mountTable)
throws InvalidPathException {
UfsStatus status;
try {
status = getStatus(path);
} catch (FileNotFoundException e) {
return null;
}
if (status != null) {
return status;
}
MountTable.Resolution resolution = mountTable.resolve(path);
AlluxioURI ufsUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
UfsStatus ufsStatus = ufs.getStatus(ufsUri.toString());
mountTable.getUfsSyncMetric(resolution.getMountId()).inc();
if (ufsStatus == null) {
mAbsentCache.addSinglePath(path);
return null;
}
ufsStatus.setName(path.getName());
addStatus(path, ufsStatus);
return ufsStatus;
} catch (FileNotFoundException e) {
// If the ufs can not find the file, we explicitly mark it absent so we do not recheck it
mAbsentCache.addSinglePath(path);
} catch (IllegalArgumentException | IOException e) {
LogUtils.warnWithException(LOG, "Failed to fetch status for {}", path, e);
}
return null;
}
/**
* Fetches children of a given alluxio path, stores them in the cache, then returns them.
*
* Children can be returned in a few ways
* 1. Children already exist in the internal index. We simply return them
* 2. If children did not already exist in the index, then check if there was a scheduled
* prefetch job running for this path. If so, wait for the job to finish and return the result.
* 3. If no prefetch job, and children don't yet exist in the cache, then if the fallback
* parameter is true, fetch them from the UFS and store them in the cache. Otherwise, simply
* return null.
*
* @param rpcContext the rpcContext of the source of this call
* @param path the Alluxio path to get the children of
* @param mountTable the Alluxio mount table
* @param useFallback whether or not to fall back to calling the UFS
* @return child UFS statuses of the alluxio path, or null if no prefetch job and fallback
* specified as false
* @throws InvalidPathException if the alluxio path can't be resolved to a UFS mount
*/
@Nullable
public Collection fetchChildrenIfAbsent(RpcContext rpcContext, AlluxioURI path,
MountTable mountTable, boolean useFallback)
throws InterruptedException, InvalidPathException {
Future> prefetchJob = mActivePrefetchJobs.get(path);
if (prefetchJob != null) {
while (true) {
try {
Collection statuses = prefetchJob.get(
mUfsFetchTimeout, TimeUnit.MILLISECONDS);
if (statuses != null) {
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_PATHS.inc(statuses.size());
}
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_SUCCESS.inc();
return statuses;
} catch (TimeoutException e) {
if (rpcContext != null) {
rpcContext.throwIfCancelled();
}
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_RETRIES.inc();
} catch (InterruptedException | ExecutionException e) {
LogUtils.warnWithException(LOG,
"Failed to get result for prefetch job on alluxio path {}", path, e);
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_FAIL.inc();
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
throw (InterruptedException) e;
}
break;
} finally {
mActivePrefetchJobs.remove(path);
}
}
}
Collection children = getChildren(path);
if (children != null) {
return children;
}
if (useFallback) {
if (prefetchJob != null) {
prefetchJob.cancel(false);
}
return getChildrenIfAbsent(path, mountTable);
}
return null;
}
/**
* Fetches children of a given alluxio path stores them in the cache, then returns them.
*
* Will always return statuses from the UFS whether or not they exist in the cache, and whether
* a prefetch job was scheduled or not.
*
* @param rpcContext the rpcContext of the source of this call
* @param path the Alluxio path
* @param mountTable the Alluxio mount table
* @return child UFS statuses of the alluxio path
* @throws InvalidPathException if the alluxio path can't be resolved to a UFS mount
*/
@Nullable
public Collection fetchChildrenIfAbsent(RpcContext rpcContext, AlluxioURI path,
MountTable mountTable)
throws InterruptedException, InvalidPathException {
return fetchChildrenIfAbsent(rpcContext, path, mountTable, true);
}
/**
* Retrieves the child UFS statuses for a given path and stores them in the cache.
*
* This method first checks if the children have already been retrieved, and if not, then
* retrieves them.
* @param path the path to get the children for
* @param mountTable the Alluxio mount table
* @return the child statuses that were stored in the cache, or null if the UFS couldn't list the
* statuses
* @throws InvalidPathException when the table can't resolve the mount for the given URI
*/
@Nullable
Collection getChildrenIfAbsent(AlluxioURI path, MountTable mountTable)
throws InvalidPathException {
Collection children = getChildren(path);
if (children != null) {
return children;
}
if (mAbsentCache.isAbsentSince(path, mCacheValidTime)) {
return null;
}
MountTable.Resolution resolution = mountTable.resolve(path);
AlluxioURI ufsUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
UfsStatus[] statuses = ufs.listStatus(ufsUri.toString());
mountTable.getUfsSyncMetric(resolution.getMountId()).inc();
if (statuses == null) {
mAbsentCache.addSinglePath(path);
return null;
}
children = new UnmodifiableArrayList<>(statuses);
addChildren(path, children);
} catch (IllegalArgumentException | IOException e) {
LOG.debug("Failed to add status to cache {}", path, e);
}
return children;
}
/**
* Get the child {@link UfsStatus}es from a given {@link AlluxioURI}.
*
* @param path the path the retrieve
* @return The corresponding {@link UfsStatus} or {@code null} if there is none stored
*/
@Nullable
public Collection getChildren(AlluxioURI path) {
return mChildren.get(path);
}
/**
* Submit a request to asynchronously fetch the statuses corresponding to a given directory.
*
* Retrieve any fetched statuses by calling
* {@link #fetchChildrenIfAbsent(RpcContext, AlluxioURI, MountTable)} with the same Alluxio path.
*
* If no {@link ExecutorService} was provided to this object before instantiation, this method is
* a no-op.
*
* @param path the path to prefetch
* @param mountTable the Alluxio mount table
* @return the future corresponding to the fetch task
*/
@Nullable
public Future> prefetchChildren(AlluxioURI path, MountTable mountTable) {
if (mPrefetchExecutor == null) {
return null;
}
Future> prev = mActivePrefetchJobs.get(path);
if (prev != null) {
return prev;
}
try {
Future> job =
mPrefetchExecutor.submit(() -> getChildrenIfAbsent(path, mountTable));
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_OPS_COUNT.inc();
prev = mActivePrefetchJobs.put(path, job);
if (prev != null) {
prev.cancel(true);
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_CANCEL.inc();
}
return job;
} catch (RejectedExecutionException e) {
LOG.debug("Failed to submit prefetch job for path {}", path, e);
return null;
}
}
/**
* Interrupts and cancels any currently running prefetch jobs.
*/
public void cancelAllPrefetch() {
for (Future> f : mActivePrefetchJobs.values()) {
f.cancel(false);
}
DefaultFileSystemMaster.Metrics.METADATA_SYNC_PREFETCH_CANCEL.inc(mActivePrefetchJobs.size());
mActivePrefetchJobs.clear();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy