
alluxio.master.file.InodeSyncStream Maven / Gradle / Ivy
The newest version!
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.file;
import alluxio.AlluxioURI;
import alluxio.client.WriteType;
import alluxio.collections.Pair;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.AccessControlException;
import alluxio.exception.BlockInfoException;
import alluxio.exception.DirectoryNotEmptyException;
import alluxio.exception.FileAlreadyCompletedException;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidFileSizeException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.status.UnavailableException;
import alluxio.file.options.DescendantType;
import alluxio.grpc.CompleteFilePOptions;
import alluxio.grpc.DeletePOptions;
import alluxio.grpc.FileSystemMasterCommonPOptions;
import alluxio.grpc.GrpcUtils;
import alluxio.grpc.LoadDescendantPType;
import alluxio.grpc.LoadMetadataPOptions;
import alluxio.grpc.SetAttributePOptions;
import alluxio.master.file.contexts.CompleteFileContext;
import alluxio.master.file.contexts.CreateDirectoryContext;
import alluxio.master.file.contexts.CreateFileContext;
import alluxio.master.file.contexts.DeleteContext;
import alluxio.master.file.contexts.LoadMetadataContext;
import alluxio.master.file.contexts.SetAttributeContext;
import alluxio.master.file.meta.Inode;
import alluxio.master.file.meta.InodeFile;
import alluxio.master.file.meta.InodeLockManager;
import alluxio.master.file.meta.InodeTree;
import alluxio.master.file.meta.InodeTree.LockPattern;
import alluxio.master.file.meta.LockedInodePath;
import alluxio.master.file.meta.LockingScheme;
import alluxio.master.file.meta.MountTable;
import alluxio.master.file.meta.MutableInodeFile;
import alluxio.master.file.meta.SyncCheck;
import alluxio.master.file.meta.SyncCheck.SyncResult;
import alluxio.master.file.meta.UfsAbsentPathCache;
import alluxio.master.file.meta.UfsSyncPathCache;
import alluxio.master.file.meta.UfsSyncUtils;
import alluxio.master.journal.FileSystemMergeJournalContext;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.MergeJournalContext;
import alluxio.master.journal.MetadataSyncMergeJournalContext;
import alluxio.master.journal.NoopJournalContext;
import alluxio.master.metastore.ReadOnlyInodeStore;
import alluxio.proto.journal.File;
import alluxio.proto.journal.Journal;
import alluxio.resource.CloseableIterator;
import alluxio.resource.CloseableResource;
import alluxio.security.authorization.AccessControlList;
import alluxio.security.authorization.DefaultAccessControlList;
import alluxio.security.authorization.Mode;
import alluxio.underfs.Fingerprint;
import alluxio.underfs.UfsFileStatus;
import alluxio.underfs.UfsManager;
import alluxio.underfs.UfsStatus;
import alluxio.underfs.UfsStatusCache;
import alluxio.underfs.UnderFileSystem;
import alluxio.util.LogUtils;
import alluxio.util.interfaces.Scoped;
import alluxio.util.io.PathUtils;
import com.codahale.metrics.Counter;
import com.google.common.base.MoreObjects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Function;
import javax.annotation.Nullable;
/**
* This class is responsible for maintaining the logic which surrounds syncing metadata between
* Alluxio and its UFSes.
*
* This implementation uses a BFS-based approach to crawl the inode tree. In order to speed up
* the sync process we use an {@link ExecutorService} which we submit inode paths to using
* {@link #processSyncPath(AlluxioURI, RpcContext)}.
* The processing of inode paths will discover new paths to
* sync depending on the {@link #mDescendantType}. Syncing is finished when all submitted tasks
* are completed and there are no new inodes left in the queue.
*
* Syncing inode metadata requires making calls to the UFS. This implementation will schedule UFS
* RPCs with the {@link UfsStatusCache#prefetchChildren(AlluxioURI, MountTable)}. Then, once the
* inode begins processing, it can retrieve the results. After processing, it can then remove its
* {@link UfsStatus} from the cache. This strategy helps reduce memory pressure on the master
* while performing a sync for a large tree. Additionally, by using a prefetch mechanism we can
* concurrently process other inodes while waiting for UFS RPCs to complete.
*
* With regard to locking, this class expects to be able to take a write lock on any inode, and
* then subsequently downgrades or unlocks after the sync is finished. Even though we use
* {@link java.util.concurrent.locks.ReentrantReadWriteLock}, because we concurrently process
* inodes on separate threads, we cannot utilize the reentrant behavior. The implications of
* that mean the caller of this class must not hold a write while calling {@link #sync()}.
*
* A user of this class is expected to create a new instance for each path that they would like
* to process. This is because the Lock on the {@link #mRootScheme} may be changed after calling
* {@link #sync()}.
*
* When a sync happens on a directory, only the sync timestamp of the directory itself will be
* updated (including information if the sync was recursive) and not its children.
* Then whenever a path checked it will also check its parent's sync time up to the root.
* There are currently two reasons for this, the first is so that the sync cache will be
* updated only on the parent directory level and not for every child synced meaning there will be
* fewer entries in the cache. Second that updating the children times individually would require
* a redesign because the sync paths are not tracked in a way currently where they know
* when their children finish (apart from the root sync path).
*
* When checking if a child of the root sync path needs to be synced, the following
* two items are considered:
* 1. If a child directory does not need to be synced, then it will not be synced.
* The parent will then update its sync time only to the oldest sync time of a child
* that was not synced (or the current clock time if all children were synced).
* 2. If a child file does not need to be synced (but its updated state has already
* been loaded from the UFS due to the listing of the parent directory) then the
* sync is still performed (because no additional UFS operations are needed,
* unless ACL is enabled for the UFS, then an additional UFS call would be
* needed so the sync is skipped and the time is calculated as in 1.).
*
* To go through an example (note I am assuming every path here is a directory).
* If say the interval is 100s, and the last synced timestamps are:
* /a 0
* /a/b 10
* /a/c 0
* /a/d 0
* /a/e 0
* /a/f 0
* Then the current timestamp is 100 and a sync will trigger with the sync for /a/b skipped.
* Then the timestamps look like the following:
* /a 10
* /a/b 10
* /a/c 0
* /a/d 0
* /a/e 0
* /a/f 0
*
* Now if we do a sync at timestamp 110, a metadata sync for /a will be triggered again,
* all children are synced. After the operation, the timestamp looks like
* (i.e. all paths have a sync time of 110):
* /a 110
* /a/b 10
* /a/c 0
* /a/d 0
* /a/e 0
* /a/f 0
*
* Here is a second example:
* If say the interval is 100s, the last synced timestamps are:
* /a 0
* /a/b 0
* /a/c 0
* /a/d 0
* /a/e 0
* /a/f 0
* Now say at time 90 some children are synced individually.
* Then the timestamps look like the following:
* /a 0
* /a/b 0
* /a/c 90
* /a/d 90
* /a/e 90
* /a/f 90
*
* and if we do a sync at timestamp 100, a sync will only happen on /a/b,
* and /a will get updated to 90
* /a 90
* /a/b 0
* /a/c 90
* /a/d 90
* /a/e 90
* /a/f 90
*
* Note that we may consider different ways of deciding how to sync children
* (see https://github.com/Alluxio/alluxio/pull/16081).
*/
public class InodeSyncStream {
/**
* Return status of a sync result.
*/
public enum SyncStatus {
OK,
FAILED,
NOT_NEEDED
}
private static final Logger LOG = LoggerFactory.getLogger(InodeSyncStream.class);
private static final FileSystemMasterCommonPOptions NO_TTL_OPTION =
FileSystemMasterCommonPOptions.newBuilder()
.setTtl(-1)
.build();
/** The root path. Should be locked with a write lock. */
private final LockingScheme mRootScheme;
/** A {@link UfsSyncPathCache} maintained from the {@link DefaultFileSystemMaster}. */
private final UfsSyncPathCache mUfsSyncPathCache;
/** Object holding the {@link UfsStatus}es which may be required for syncing. */
private final UfsStatusCache mStatusCache;
/** Inode tree to lock new paths. */
private final InodeTree mInodeTree;
/** Determines how deep in the tree we need to load. */
private final DescendantType mDescendantType;
/** The {@link RpcContext} from the caller. */
private final RpcContext mRpcContext;
/** The inode store to look up children. */
private final ReadOnlyInodeStore mInodeStore;
/** The mount table for looking up the proper UFS client based on the Alluxio path. */
private final MountTable mMountTable;
/** The lock manager used to try acquiring the persisting lock. */
private final InodeLockManager mInodeLockManager;
/** The FS master creating this object. */
private final DefaultFileSystemMaster mFsMaster;
/** Set this to true to force a sync regardless of the UfsPathCache. */
private final boolean mForceSync;
/** The sync options on the RPC. */
private final FileSystemMasterCommonPOptions mSyncOptions;
/** To determine if we should use the MergeJournalContext to merge journals. */
private final boolean mUseFileSystemMergeJournalContext = Configuration.getBoolean(
PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS
);
/** To determine whether we should only let the UFS sync happen once
* for the concurrent metadata sync requests syncing the same directory.
*/
private final boolean mDedupConcurrentSync = Configuration.getBoolean(
PropertyKey.MASTER_METADATA_CONCURRENT_SYNC_DEDUP
);
private static final MetadataSyncLockManager SYNC_METADATA_LOCK_MANAGER =
new MetadataSyncLockManager();
/** Whether to only read+create metadata from the UFS, or to update metadata as well. */
private final boolean mLoadOnly;
/** Deque used to keep track of paths that still need to be synced. */
private final ConcurrentLinkedDeque mPendingPaths;
/** The traversal order of {@link #mPendingPaths}. */
private final MetadataSyncTraversalOrder mTraverseType;
/** Queue of paths that have been submitted to the executor. */
private final Queue> mSyncPathJobs;
/** The executor enabling concurrent processing. */
private final ExecutorService mMetadataSyncService;
/** The interval of time passed (in ms) to require a new sync. */
private final long mSyncInterval;
/** The maximum number of concurrent paths that can be syncing at any moment. */
private final int mConcurrencyLevel =
Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_CONCURRENCY_LEVEL);
private final boolean mGetDirectoryStatusSkipLoadingChildren =
Configuration.getBoolean(
PropertyKey.MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN);
private final FileSystemMasterAuditContext mAuditContext;
private final Function mAuditContextSrcInodeFunc;
private final Clock mClock;
/**
* Create a new instance of {@link InodeSyncStream}.
*
* The root path should be already locked with {@link LockPattern#WRITE_EDGE} unless the user is
* only planning on loading metadata. The desired pattern should always be
* {@link LockPattern#READ}.
*
* It is an error to initiate sync without a WRITE_EDGE lock when loadOnly is {@code false}.
* If loadOnly is set to {@code true}, then the root path may have a read lock.
*
* @param rootPath The root path to begin syncing
* @param fsMaster the {@link FileSystemMaster} calling this method
* @param syncPathCache the {@link UfsSyncPathCache} for the given path
* @param rpcContext the caller's {@link RpcContext}
* @param descendantType determines the number of descendant inodes to sync
* @param options the RPC's {@link FileSystemMasterCommonPOptions}
* @param auditContext the audit context to use when loading
* @param auditContextSrcInodeFunc the inode to set as the audit context source
* @param forceSync whether to sync inode metadata no matter what
* @param loadOnly whether to only load new metadata, rather than update existing metadata
* @param loadAlways whether to always load new metadata from the ufs, even if a file or
* directory has been previous found to not exist
*/
public InodeSyncStream(LockingScheme rootPath, DefaultFileSystemMaster fsMaster,
UfsSyncPathCache syncPathCache,
RpcContext rpcContext, DescendantType descendantType, FileSystemMasterCommonPOptions options,
@Nullable FileSystemMasterAuditContext auditContext,
@Nullable Function auditContextSrcInodeFunc,
boolean forceSync, boolean loadOnly, boolean loadAlways)
{
mPendingPaths = new ConcurrentLinkedDeque<>();
mTraverseType = Configuration.getEnum(PropertyKey.MASTER_METADATA_SYNC_TRAVERSAL_ORDER,
MetadataSyncTraversalOrder.class);
mDescendantType = descendantType;
mRpcContext = rpcContext;
mMetadataSyncService = fsMaster.mSyncMetadataExecutorIns;
mClock = fsMaster.mClock;
mForceSync = forceSync;
mRootScheme = rootPath;
mSyncOptions = options;
mLoadOnly = loadOnly;
mSyncPathJobs = new LinkedList<>();
mFsMaster = fsMaster;
mInodeLockManager = fsMaster.getInodeLockManager();
mInodeStore = fsMaster.getInodeStore();
mInodeTree = fsMaster.getInodeTree();
mMountTable = fsMaster.getMountTable();
mUfsSyncPathCache = syncPathCache;
mAuditContext = auditContext;
mAuditContextSrcInodeFunc = auditContextSrcInodeFunc;
mSyncInterval = options.hasSyncIntervalMs() ? options.getSyncIntervalMs() :
Configuration.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL);
// If an absent cache entry was more recent than this value, then it is valid for this sync
long validCacheTime;
if (loadOnly) {
if (loadAlways) {
validCacheTime = UfsAbsentPathCache.NEVER;
} else {
validCacheTime = UfsAbsentPathCache.ALWAYS;
}
} else {
validCacheTime = mClock.millis() - mSyncInterval;
}
mStatusCache = new UfsStatusCache(fsMaster.mSyncPrefetchExecutorIns,
fsMaster.getAbsentPathCache(), validCacheTime);
// Maintain a global counter of active sync streams
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_COUNT.inc();
}
/**
* Create a new instance of {@link InodeSyncStream} without any audit or permission checks.
*
* @param rootScheme The root path to begin syncing
* @param fsMaster the {@link FileSystemMaster} calling this method
* @param syncPathCache the {@link UfsSyncPathCache} for this path
* @param rpcContext the caller's {@link RpcContext}
* @param descendantType determines the number of descendant inodes to sync
* @param options the RPC's {@link FileSystemMasterCommonPOptions}
* @param forceSync whether to sync inode metadata no matter what
* @param loadOnly whether to only load new metadata, rather than update existing metadata
* @param loadAlways whether to always load new metadata from the ufs, even if a file or
* directory has been previous found to not exist
*/
public InodeSyncStream(LockingScheme rootScheme, DefaultFileSystemMaster fsMaster,
UfsSyncPathCache syncPathCache,
RpcContext rpcContext, DescendantType descendantType, FileSystemMasterCommonPOptions options,
boolean forceSync, boolean loadOnly, boolean loadAlways)
{
this(rootScheme, fsMaster, syncPathCache, rpcContext, descendantType, options, null, null,
forceSync, loadOnly, loadAlways);
}
/**
* Sync the metadata according the root path the stream was created with.
* [WARNING]:
* To avoid deadlock, please do not obtain any inode path lock before calling this method.
*
* @return SyncStatus object
*/
public SyncStatus sync() throws AccessControlException, InvalidPathException {
LOG.debug("Running InodeSyncStream on path {}, with status {}, and force sync {}",
mRootScheme.getPath(), mRootScheme.shouldSync(), mForceSync);
if (!mRootScheme.shouldSync().isShouldSync() && !mForceSync) {
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc();
return SyncStatus.NOT_NEEDED;
}
if (!mDedupConcurrentSync) {
return syncInternal();
}
try (MetadataSyncLockManager.MetadataSyncPathList ignored = SYNC_METADATA_LOCK_MANAGER.lockPath(
mRootScheme.getPath())) {
mRpcContext.throwIfCancelled();
return syncInternal();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private SyncStatus syncInternal() throws
AccessControlException, InvalidPathException {
// The high-level process for the syncing is:
// 1. Given an Alluxio path, determine if it is not consistent with the corresponding UFS path.
// this means the UFS path does not exist, or has metadata which differs from Alluxio
// 2. If only the metadata changed, update the inode with the new metadata
// 3. If the path does not exist in the UFS, delete the inode in Alluxio
// 4. If not deleted, load metadata from the UFS
// 5. If a recursive sync, add children inodes to sync queue
int syncPathCount = 0;
int failedSyncPathCount = 0;
int skippedSyncPathCount = 0;
int stopNum = -1; // stop syncing when we've processed this many paths. -1 for infinite
if (mDedupConcurrentSync && mRootScheme.shouldSync() != SyncCheck.SHOULD_SYNC) {
/*
* If a concurrent sync on the same path is successful after this sync had already
* been initialized and that sync is successful, then there is no need to sync again.
* This is done by checking is the last successful sync time for the path has
* increased since this sync was started.
* * e.g.
* First assume the last sync time for path /aaa is 0
* 1. [TS=100] the sync() method is called by thread A for path /aaa with sync
* interval 50, so a sync starts
* 2. [TS=110] the sync() method is called by thread B for path /aaa,
* using syncInterval 100, so a sync starts, but
* thread B is blocked by the metadata sync lock,
* 3. [TS=180] thread A finishes the metadata sync, update the SyncPathCache,
* setting the last sync timestamp to 100.
* 4. [TS=182] thread B acquired the lock and can start sync
* 5. [TS=182] since the sync time for the path was 0 when thread B started,
* and is now 100, thread B can skip the sync and return NOT_NEEDED.
* Note that this still applies if A is to sync recursively path /aaa while B is to
* sync path /aaa/bbb as the sync scope of A covers B's.
*/
boolean shouldSkipSync =
mUfsSyncPathCache.shouldSyncPath(mRootScheme.getPath(), mSyncInterval,
mDescendantType).getLastSyncTime() > mRootScheme.shouldSync().getLastSyncTime();
if (shouldSkipSync) {
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc();
LOG.debug("Skipped sync on {} due to successful concurrent sync", mRootScheme.getPath());
return SyncStatus.NOT_NEEDED;
}
}
LOG.debug("Running InodeSyncStream on path {}", mRootScheme.getPath());
long startTime = mUfsSyncPathCache.recordStartSync();
boolean rootPathIsFile = false;
RpcContext rpcContext = getMetadataSyncRpcContext();
try (LockedInodePath path =
mInodeTree.lockInodePath(mRootScheme, rpcContext.getJournalContext())) {
if (mAuditContext != null && mAuditContextSrcInodeFunc != null) {
mAuditContext.setSrcInode(mAuditContextSrcInodeFunc.apply(path));
}
syncInodeMetadata(path, rpcContext);
syncPathCount++;
if (mDescendantType == DescendantType.ONE) {
// If descendantType is ONE, then we shouldn't process any more paths except for those
// currently in the queue
stopNum = mPendingPaths.size();
} else if (mGetDirectoryStatusSkipLoadingChildren && mDescendantType == DescendantType.NONE) {
// If descendantType is NONE, do not process any path in the queue after
// the inode itself is loaded.
stopNum = 0;
}
// process the sync result for the original path
try {
path.traverse();
if (path.fullPathExists()) {
rootPathIsFile = !path.getInode().isDirectory();
}
} catch (InvalidPathException e) {
updateMetrics(false, startTime, syncPathCount, failedSyncPathCount);
throw new RuntimeException(e);
}
} catch (FileDoesNotExistException e) {
LOG.warn("Failed to sync metadata on root path {} because it"
+ " does not exist on the UFS or in Alluxio", this);
failedSyncPathCount++;
} catch (BlockInfoException | FileAlreadyCompletedException
| InterruptedException | InvalidFileSizeException
| IOException e) {
LogUtils.warnWithException(LOG, "Failed to sync metadata on root path {}",
toString(), e);
failedSyncPathCount++;
} catch (InvalidPathException | AccessControlException e) {
// Catch and re-throw just to update metrics before exit
LogUtils.warnWithException(LOG, "Failed to sync metadata on root path {}",
toString(), e);
updateMetrics(false, startTime, syncPathCount, failedSyncPathCount);
throw e;
} finally {
// regardless of the outcome, remove the UfsStatus for this path from the cache
mStatusCache.remove(mRootScheme.getPath());
// add the remaining journals into the async journal writer
maybeFlushJournalToAsyncJournalWriter(rpcContext);
}
// For any children that skip syncing because of a recent sync time,
// we will only update the root path to the oldest of these times
Long childOldestSkippedSync = null;
// Process any children after the root.
while (!mPendingPaths.isEmpty() || !mSyncPathJobs.isEmpty()) {
if (Thread.currentThread().isInterrupted()) {
LOG.warn("Metadata syncing was interrupted before completion; {}", this);
break;
}
if (mRpcContext.isCancelled()) {
LOG.warn("Metadata syncing was cancelled before completion; {}", this);
break;
}
// There are still paths to process
// First, remove any futures which have completed. Add to the sync path count if they sync'd
// successfully
while (true) {
Future job = mSyncPathJobs.peek();
if (job == null || !job.isDone()) {
break;
}
// remove the job because we know it is done.
if (mSyncPathJobs.poll() != job) {
updateMetrics(false, startTime, syncPathCount, failedSyncPathCount);
throw new ConcurrentModificationException("Head of queue modified while executing");
}
// Update a global counter
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_ACTIVE_PATHS_TOTAL.dec();
try {
// we synced the path successfully
// This shouldn't block because we checked job.isDone() earlier
SyncResult result = job.get();
if (!result.isResultValid()) {
failedSyncPathCount++;
} else if (result.wasSyncPerformed()) {
syncPathCount++;
} else {
skippedSyncPathCount++;
}
if (result.isResultValid() && !result.wasSyncPerformed()) {
childOldestSkippedSync = childOldestSkippedSync == null ? result.getLastSyncTime()
: Math.min(childOldestSkippedSync, result.getLastSyncTime());
}
} catch (InterruptedException | ExecutionException e) {
failedSyncPathCount++;
LogUtils.warnWithException(
LOG, "metadata sync failed while polling for finished paths; {}",
toString(), e);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
break;
}
}
}
// When using descendant type of ONE, we need to stop prematurely.
if (stopNum != -1 && (syncPathCount + failedSyncPathCount + skippedSyncPathCount) > stopNum) {
break;
}
// We can submit up to ( max_concurrency - ) jobs back into the queue
int submissions = mConcurrencyLevel - mSyncPathJobs.size();
for (int i = 0; i < submissions; i++) {
AlluxioURI path = pollItem();
if (path == null) {
// no paths left to sync
break;
}
RpcContext rpcContextForSyncPath = getMetadataSyncRpcContext();
Future job =
mMetadataSyncService.submit(() -> processSyncPath(path, rpcContextForSyncPath));
mSyncPathJobs.offer(job);
// Update global counters for all sync streams
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_PENDING_PATHS_TOTAL.dec();
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_ACTIVE_PATHS_TOTAL.inc();
}
// After submitting all jobs wait for the job at the head of the queue to finish.
Future oldestJob = mSyncPathJobs.peek();
if (oldestJob == null) { // There might not be any jobs, restart the loop.
continue;
}
try {
oldestJob.get(); // block until the oldest job finished.
} catch (InterruptedException | ExecutionException e) {
LogUtils.warnWithException(
LOG, "Exception while waiting for oldest metadata sync job to finish: {}",
toString(), e);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
}
}
boolean success = syncPathCount > 0;
if (Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_REPORT_FAILURE)) {
// There should not be any failed or outstanding jobs
success = (failedSyncPathCount == 0) && mSyncPathJobs.isEmpty() && mPendingPaths.isEmpty();
}
if (success) {
// update the sync path cache for the root of the sync
// TODO(gpang): Do we need special handling for failures and thread interrupts?
mUfsSyncPathCache.notifySyncedPath(mRootScheme.getPath(), mDescendantType,
startTime, childOldestSkippedSync, rootPathIsFile);
}
mStatusCache.cancelAllPrefetch();
mSyncPathJobs.forEach(f -> f.cancel(true));
if (!mPendingPaths.isEmpty() || !mSyncPathJobs.isEmpty()) {
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SYNC_PATHS_CANCEL.inc(
mPendingPaths.size() + mSyncPathJobs.size());
}
if (!mSyncPathJobs.isEmpty()) {
DefaultFileSystemMaster.Metrics
.INODE_SYNC_STREAM_ACTIVE_PATHS_TOTAL.dec(mSyncPathJobs.size());
}
if (!mPendingPaths.isEmpty()) {
DefaultFileSystemMaster.Metrics
.INODE_SYNC_STREAM_PENDING_PATHS_TOTAL.dec(mPendingPaths.size());
}
maybeFlushJournalToAsyncJournalWriter(rpcContext);
// Update metrics at the end of operation
updateMetrics(success, startTime, syncPathCount, failedSyncPathCount);
return success ? SyncStatus.OK : SyncStatus.FAILED;
}
private void updateMetrics(boolean success, long startTime,
int successPathCount, int failedPathCount) {
long duration = mClock.millis() - startTime;
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_TIME_MS.inc(duration);
if (success) {
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SUCCESS.inc();
} else {
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_FAIL.inc();
}
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SYNC_PATHS_SUCCESS.inc(successPathCount);
DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SYNC_PATHS_FAIL.inc(failedPathCount);
if (LOG.isDebugEnabled()) {
LOG.debug("synced {} paths ({} success, {} failed) in {} ms on {}",
successPathCount + failedPathCount, successPathCount, failedPathCount,
duration, mRootScheme);
}
}
/**
* Process a path to sync.
*
* This can update metadata for the inode, delete the inode, and/or queue any children that should
* be synced as well.
*
* @param path The path to sync
* @return true if this path was synced
*/
private SyncResult processSyncPath(AlluxioURI path, RpcContext rpcContext)
throws InvalidPathException {
try {
return processSyncPathInternal(path, rpcContext);
} finally {
maybeFlushJournalToAsyncJournalWriter(rpcContext);
}
}
private SyncResult processSyncPathInternal(AlluxioURI path, RpcContext rpcContext)
throws InvalidPathException {
if (path == null) {
return SyncResult.INVALID_RESULT;
}
// if we have already loaded the path from the UFS, and the path
// is not a directory and ACL is disabled, then we will always finish the sync
// (even if it is not needed) since we already have all the data we need
boolean forceSync = !mFsMaster.isAclEnabled() && mStatusCache.hasStatus(path).map(
ufsStatus -> !ufsStatus.isDirectory()).orElse(false);
LockingScheme scheme;
// forceSync is true means listStatus already prefetched metadata of children,
// update metadata for such cases
if (mForceSync || forceSync) {
scheme = new LockingScheme(path, LockPattern.READ, true);
} else {
scheme = new LockingScheme(path, LockPattern.READ, mSyncOptions,
mUfsSyncPathCache, mDescendantType);
}
if (!scheme.shouldSync().isShouldSync() && !mForceSync) {
return scheme.shouldSync().skippedSync();
}
try (LockedInodePath inodePath =
mInodeTree.tryLockInodePath(scheme, rpcContext.getJournalContext())) {
if (Thread.currentThread().isInterrupted()) {
LOG.warn("Thread syncing {} was interrupted before completion", inodePath.getUri());
return SyncResult.INVALID_RESULT;
}
syncInodeMetadata(inodePath, rpcContext);
return scheme.shouldSync().syncSuccess();
} catch (AccessControlException | BlockInfoException | FileAlreadyCompletedException
| FileDoesNotExistException | InterruptedException | InvalidFileSizeException
| InvalidPathException | IOException e) {
LogUtils.warnWithException(LOG, "Failed to process sync path: {}", path, e);
} finally {
// regardless of the outcome, remove the UfsStatus for this path from the cache
mStatusCache.remove(path);
}
return SyncResult.INVALID_RESULT;
}
private void syncInodeMetadata(LockedInodePath inodePath, RpcContext rpcContext)
throws InvalidPathException, AccessControlException, IOException, FileDoesNotExistException,
FileAlreadyCompletedException, InvalidFileSizeException, BlockInfoException,
InterruptedException {
if (!inodePath.fullPathExists()) {
loadMetadataForPath(inodePath, rpcContext);
// skip the load metadata step in the sync if it has been just loaded
syncExistingInodeMetadata(inodePath, rpcContext, true);
} else {
syncExistingInodeMetadata(inodePath, rpcContext, false);
}
}
private Object getFromUfs(Callable
© 2015 - 2025 Weber Informatics LLC | Privacy Policy