All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.file.meta.LockedInodePath Maven / Gradle / Ivy

The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.file.meta;

import alluxio.AlluxioURI;
import alluxio.concurrent.LockMode;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.status.UnavailableException;
import alluxio.master.file.meta.InodeTree.LockPattern;
import alluxio.master.journal.FileSystemMergeJournalContext;
import alluxio.master.journal.JournalContext;
import alluxio.master.metastore.ReadOnlyInodeStore;
import alluxio.resource.AlluxioResourceLeakDetectorFactory;
import alluxio.util.io.PathUtils;

import com.google.common.base.Preconditions;
import io.netty.util.ResourceLeakDetector;
import io.netty.util.ResourceLeakTracker;

import java.io.Closeable;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.locks.Lock;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * This class represents a locked path within the inode tree, starting from the root.
 *
 * The mUri and mPathComponents fields are immutable and contain the "target" path represented by
 * the LockedInodePath.
 *
 * mLockList manages the locks held by the LockedInodePath.
 *
 * The mExistingInodes list holds inodes that are locked and known to exist. The inodes might not be
 * individually locked - if one inode is write-locked, the inodes after it are implicitly
 * write-locked and may be added to mExistingInodes. The inodes in mExistingInodes are a prefix of
 * the inodes referenced by mPathComponents.
 *
 * If the full path exists, mExistingInodes will have size equal to mPathComponents.length after
 * traversal. mExistingInodes is always at least as long as the list of inodes directly locked by
 * mLockList.
 *
 * To create new inode paths from an existing path, use one of the lock*() methods. They return new
 * locked inode paths that can be modified and closed without affecting the original path. Modifying
 * the original inode path invalidates any subpaths that it has created.
 *
 * Locked inode paths are not threadsafe and should not be shared across threads.
 */
@NotThreadSafe
public class LockedInodePath implements Closeable {

  private static final ResourceLeakDetector DETECTOR =
      AlluxioResourceLeakDetectorFactory.instance().newResourceLeakDetector(LockedInodePath.class);
  /**
   * The root inode of the inode tree. This is needed to bootstrap the inode path.
   */
  private final Inode mRoot;
  /** Inode store for looking up children. */
  private final ReadOnlyInodeStore mInodeStore;

  /** Uri for the path represented. */
  protected final AlluxioURI mUri;
  /** The components of mUri. */
  protected final String[] mPathComponents;
  /** Lock list locking some portion of the path according to mLockPattern. */
  protected final InodeLockList mLockList;
  /** The locking pattern. */
  protected LockPattern mLockPattern;
  /** Whether to use {@link Lock#tryLock()} or {@link Lock#lock()}. */
  private final boolean mUseTryLock;
  /** Tracker used for logging leaked resources. */
  @Nullable
  private final ResourceLeakTracker mTracker;
  /** To determine if we should flush the journals when lock is released or scope reduced. */
  private final boolean mMergeInodeJournals;

  /**
   * Keeps a reference of JournalContext and flushes it before the lock is released.
   * This is used to prevent inconsistency between primary and standby.
   * A typical write operation in FileSystem looks like:
   * 
   * try (createJournalContext()) {
   *   try (lockInodePath(uri)) {
   *     // File System Operations
   *     // ...
   *   }
   * }
   * 
* * * Because the inode path lock is released ahead of the close of the journal context, * other requests can do other file system operations on the inode path we locked * before the journals are actually flushed and committed. If a failover happens, * the client might observe an inconsistent view comparing to the one on the previous master. * * We keep the journal context instance in the LockedInodePath to mitigate the issue, * by forcing to flush journals before the lock is released. * {@link JournalContext#flush()} always commit the journals except the ones used in * metadata sync. * For performance consideration, * {@link alluxio.master.journal.MetadataSyncMergeJournalContext} only * appends journals to the async journal writer and these journals will be committed later. * * This also helps keep the ordering of the committed journals * if {@link alluxio.master.journal.FileSystemMergeJournalContext} is used because * in its implement, journals will not be queued until the context is closed or the flush method * is called. Given the LockedInodePath is closed ahead of the FileSystemMergeJournalContext, * another request can potentially quickly do a file system operation on the same file * and commits its journals, before the current thread commits its journal, which * resulted in the journals being committed in an incorrect order. * Flushing journals before closing the LockedInodePath object solves this issue. * * Note that this still doesn't solve the inconsistency between primary and standby * if the primary crashes when it has been doing a file system operation but hasn't committed * journals to standby. * * When MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS is enabled, releasing inode tree locks * will flush the entire journal context for the operation. Hence whenever a lock is released, * the operation must be sure that it has made all necessary updates to * any journal modifications that should be visible atomically. */ private final JournalContext mJournalContext; /** * Creates a new locked inode path. * * @param uri the uri for the path * @param inodeStore the inode store for looking up inode children * @param inodeLockManager the inode lock manager * @param root the root inode * @param lockPattern the pattern to lock in * @param tryLock whether or not use {@link Lock#tryLock()} or {@link Lock#lock()} * @param journalContext the journal context to flush when the lock is released */ public LockedInodePath(AlluxioURI uri, ReadOnlyInodeStore inodeStore, InodeLockManager inodeLockManager, InodeDirectory root, LockPattern lockPattern, boolean tryLock, JournalContext journalContext) throws InvalidPathException { mUri = uri; mPathComponents = PathUtils.getPathComponents(uri.getPath()); mInodeStore = inodeStore; mLockPattern = lockPattern; mRoot = root; mUseTryLock = tryLock; mLockList = new SimpleInodeLockList(inodeLockManager, mUseTryLock); mTracker = DETECTOR.track(this); mJournalContext = journalContext; mMergeInodeJournals = Configuration.getBoolean( PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** * Creates a new locked inode path, using a prefix locked inode path as a starting point. * * @param uri the uri for the new path * @param path the path to use as a starting point * @param pathComponents components of the uri * @param lockPattern the pattern to lock in */ private LockedInodePath(AlluxioURI uri, LockedInodePath path, String[] pathComponents, LockPattern lockPattern, boolean tryLock) { Preconditions.checkState(!path.mLockList.isEmpty()); mUri = uri; mPathComponents = pathComponents; mInodeStore = path.mInodeStore; mLockList = new CompositeInodeLockList(path.mLockList, tryLock); mLockPattern = lockPattern; mRoot = path.mLockList.get(0); mUseTryLock = tryLock; mTracker = DETECTOR.track(this); // LockedInodePath is not thread safe and should not be shared across threads. // So the new created LockInodePath instance must be on the same thread with // the original one and hence they will use the same JournalContext. mJournalContext = path.mJournalContext; mMergeInodeJournals = Configuration.getBoolean( PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** * @return the full uri of the path */ public AlluxioURI getUri() { return mUri; } /** * @return the target inode * @throws FileDoesNotExistException if the target inode does not exist */ public Inode getInode() throws FileDoesNotExistException { Inode inode = getInodeOrNull(); if (inode == null) { throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri)); } return inode; } /** * @return the target inode, or null if it does not exist */ @Nullable public Inode getInodeOrNull() { if (!fullPathExists()) { return null; } return mLockList.get(mLockList.numInodes() - 1); } /** * @return the target inode as an {@link MutableInodeFile} * @throws FileDoesNotExistException if the target inode does not exist, or it is not a file */ public InodeFile getInodeFile() throws FileDoesNotExistException { Inode inode = getInode(); if (!inode.isFile()) { throw new FileDoesNotExistException(ExceptionMessage.PATH_MUST_BE_FILE.getMessage(mUri)); } return inode.asFile(); } /** * @return the parent of the target inode * @throws InvalidPathException if the parent inode is not a directory * @throws FileDoesNotExistException if the parent of the target does not exist */ public InodeDirectory getParentInodeDirectory() throws InvalidPathException, FileDoesNotExistException { Inode inode = getParentInodeOrNull(); if (inode == null) { throw new FileDoesNotExistException( ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri.getParent())); } if (!inode.isDirectory()) { throw new InvalidPathException( ExceptionMessage.PATH_MUST_HAVE_VALID_PARENT.getMessage(mUri)); } return (InodeDirectory) inode; } /** * @return the parent of the target inode, or null if the parent does not exist */ @Nullable public Inode getParentInodeOrNull() { if (mPathComponents.length < 2 || mLockList.numInodes() < (mPathComponents.length - 1)) { // The path is only the root, or the list of inodes is not long enough to contain the parent return null; } return mLockList.get(mPathComponents.length - 2); } /** * @return the last existing inode on the inode path. This could be out of date if the current * thread has added or deleted inodes since the last call to traverse() */ public Inode getLastExistingInode() { return mLockList.get(mLockList.numInodes() - 1); } /** * @return a copy of the list of existing inodes, from the root */ public List getInodeList() { return mLockList.getLockedInodes(); } /** * @return a copy of the list of existing inodes, from the root */ public List getInodeViewList() { return mLockList.getLockedInodeViews(); } /** * @return the number of existing inodes in this path. This could be out of date if the current * thread has added or deleted inodes since the last call to traverse() */ public int getExistingInodeCount() { return mLockList.numInodes(); } /** * @return number of components in this locked path */ public int size() { return mPathComponents.length; } /** * @return true if the entire path of inodes exists, false otherwise. This could be out of date if * the current thread has added or deleted inodes since the last call to traverse() */ public boolean fullPathExists() { return mLockList.numInodes() == mPathComponents.length; } /** * @return the {@link LockPattern} of this path */ public LockPattern getLockPattern() { return mLockPattern; } /** * Removes the last inode from the list. This is necessary when the last inode is deleted and we * want to continue using the inodepath. This operation is only supported when the path is * complete. */ public void removeLastInode() { Preconditions.checkState(fullPathExists()); maybeFlushJournals(); mLockList.unlockLastInode(); } /** * Adds the next inode to the path. This tries to reduce the scope of locking by moving the write * lock forward to the new final edge, downgrading the previous write lock to a read lock. * * @param inode the inode to add */ public void addNextInode(Inode inode) { Preconditions.checkState(mLockPattern == LockPattern.WRITE_EDGE); Preconditions.checkState(!fullPathExists()); Preconditions.checkState(inode.getName().equals(mPathComponents[mLockList.numInodes()])); int nextInodeIndex = mLockList.numInodes() + 1; if (nextInodeIndex < mPathComponents.length) { // We need to flush the pending journals into the writer // before the lock scope is reduced. maybeFlushJournals(); mLockList.pushWriteLockedEdge(inode, mPathComponents[nextInodeIndex]); } else { mLockList.lockInode(inode, LockMode.WRITE); } } /** * Downgrades all locks in this list to read locks. */ public void downgradeToRead() { maybeFlushJournals(); mLockList.downgradeToReadLocks(); mLockPattern = LockPattern.READ; } /** * Returns the closest ancestor of the target inode (last inode in the full path). * * @return the closest ancestor inode * @throws FileDoesNotExistException if an ancestor does not exist */ public Inode getAncestorInode() throws FileDoesNotExistException { int ancestorIndex = mPathComponents.length - 2; if (ancestorIndex < 0) { throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri)); } ancestorIndex = Math.min(ancestorIndex, mLockList.numInodes() - 1); return mLockList.get(ancestorIndex); } /** * Locks a descendant of the current path and returns a new locked inode path. The path is * traversed according to the lock pattern. Closing the new path will have no effect on the * current path. * * On failure, all locks taken by this method will be released. * * @param descendantUri the full descendent uri starting from the root * @param lockPattern the lock pattern to lock in * @return the new locked path */ public LockedInodePath lockDescendant(AlluxioURI descendantUri, LockPattern lockPattern) throws InvalidPathException { LockedInodePath path = new LockedInodePath(descendantUri, this, PathUtils.getPathComponents(descendantUri.getPath()), lockPattern, mUseTryLock); path.traverseOrClose(); return path; } /** * Returns a new locked inode path composed of the current path plus the child inode. The path is * traversed according to the lock pattern. The original locked inode path is unaffected. * * childComponentsHint can be used to save the work of computing path components when the path * components for the new path are already known. * * On failure, all locks taken by this method will be released. * * @param child the child inode * @param lockPattern the lock pattern * @return the new locked path */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern) throws InvalidPathException { return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName())); } /** * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path * components are already known. * * @param child the child inode * @param lockPattern the lock pattern * @param childComponentsHint path components for the new path * @return the new locked path */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern, String[] childComponentsHint) throws InvalidPathException { return lockChildByName(child.getName(), lockPattern, childComponentsHint); } /** * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path * components are already known. * * @param childName the name of the child inode * @param lockPattern the lock pattern * @param childComponentsHint path components for the new path * @return the new locked path */ public LockedInodePath lockChildByName(String childName, LockPattern lockPattern, String[] childComponentsHint) throws InvalidPathException { LockedInodePath path = new LockedInodePath(mUri.joinUnsafe(childName), this, childComponentsHint, lockPattern, mUseTryLock); path.traverseOrClose(); return path; } private static String[] addComponent(String[] components, String component) { String[] newComponents = new String[components.length + 1]; System.arraycopy(components, 0, newComponents, 0, components.length); newComponents[components.length] = component; return newComponents; } /** * Returns a copy of the path with the final edge write locked. This requires that we haven't * already locked the final edge, i.e. the path is incomplete. * * @return the new locked path */ public LockedInodePath lockFinalEdgeWrite() throws InvalidPathException { Preconditions.checkState(!fullPathExists()); LockedInodePath newPath = new LockedInodePath(mUri, this, mPathComponents, LockPattern.WRITE_EDGE, mUseTryLock); newPath.traverse(); return newPath; } private void traverseOrClose() throws InvalidPathException { try { traverse(); } catch (Throwable t) { close(); throw t; } } /** * Traverses the inode path according to its lock pattern. If the inode path is already partially * traversed, this method will pick up where the previous traversal left off. * * On return, all existing inodes in the path are added to mExistingInodes and the inodes are * locked according to {@link LockPattern}. * * Journals are not flushed in this method because: * 1. When a LockedInodePath is created, it will be traversed first before any journal is written * 2. The only use cases that call this method independently are in metadata sync * {@link alluxio.master.file.InodeSyncStream}. Where traverse() is called right before the * lock is released, where the journals will be flushed anyway. */ public void traverse() throws InvalidPathException { // This locks the root edge and inode. bootstrapTraversal(); // Each iteration either locks a new inode/edge or hits a missing inode and returns. while (!fullPathExists()) { int lastInodeIndex = mLockList.numInodes() - 1; String nextComponent = mPathComponents[lastInodeIndex + 1]; boolean isFinalComponent = lastInodeIndex == mPathComponents.length - 2; Inode lastInode = mLockList.get(lastInodeIndex); if (mLockList.endsInInode()) { // Lock an edge next. if (mLockPattern == LockPattern.WRITE_EDGE && isFinalComponent) { mLockList.lockEdge(lastInode, nextComponent, LockMode.WRITE); } else { mLockList.lockEdge(lastInode, nextComponent, LockMode.READ); } } else { // Lock an inode next. if (!lastInode.isDirectory()) { throw new InvalidPathException(String.format( "Traversal failed for path %s. Component %s(%s) is a file, not a directory.", mUri, lastInodeIndex, lastInode.getName())); } Optional nextInodeOpt = mInodeStore.getChild(lastInode.asDirectory(), nextComponent); if (!nextInodeOpt.isPresent() && mLockPattern == LockPattern.WRITE_EDGE && !isFinalComponent) { // This pattern requires that we obtain a write lock on the final edge, so we must // upgrade to a write lock. mLockList.unlockLastEdge(); mLockList.lockEdge(lastInode, nextComponent, LockMode.WRITE); nextInodeOpt = mInodeStore.getChild(lastInode.asDirectory(), nextComponent); if (nextInodeOpt.isPresent()) { // The component must have been created between releasing the read lock and acquiring // the write lock. Downgrade and continue as normal. mLockList.downgradeLastEdge(); } } if (!nextInodeOpt.isPresent()) { if (mLockPattern == LockPattern.READ) { // WRITE_INODE and WRITE_EDGE should lock the last non-existing edge. mLockList.unlockLastEdge(); } return; } Inode nextInode = nextInodeOpt.get(); if (isFinalComponent && mLockPattern.isWrite()) { mLockList.lockInode(nextInode, LockMode.WRITE); } else { mLockList.lockInode(nextInode, LockMode.READ); } } } } private void bootstrapTraversal() { if (!mLockList.isEmpty()) { return; } LockMode edgeLock = LockMode.READ; LockMode inodeLock = LockMode.READ; if (mPathComponents.length == 1) { if (mLockPattern == LockPattern.WRITE_EDGE) { edgeLock = LockMode.WRITE; inodeLock = LockMode.WRITE; } else if (mLockPattern == LockPattern.WRITE_INODE) { inodeLock = LockMode.WRITE; } } mLockList.lockRootEdge(edgeLock); mLockList.lockInode(mRoot, inodeLock); } @Override public void close() { try { maybeFlushJournals(); } finally { // releases the locks in case journal flush failed if (mTracker != null) { mTracker.close(this); } mLockList.close(); } } @Override public String toString() { return mUri.toString(); } private void maybeFlushJournals() { if (mMergeInodeJournals) { try { mJournalContext.flush(); } catch (UnavailableException e) { throw new RuntimeException(e); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy