alluxio.master.file.meta.LockedInodePath Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of alluxio-core-server-master Show documentation
Alluxio master service
The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.file.meta;

import alluxio.AlluxioURI;
import alluxio.concurrent.LockMode;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.status.UnavailableException;
import alluxio.master.file.meta.InodeTree.LockPattern;
import alluxio.master.journal.FileSystemMergeJournalContext;
import alluxio.master.journal.JournalContext;
import alluxio.master.metastore.ReadOnlyInodeStore;
import alluxio.resource.AlluxioResourceLeakDetectorFactory;
import alluxio.util.io.PathUtils;

import com.google.common.base.Preconditions;
import io.netty.util.ResourceLeakDetector;
import io.netty.util.ResourceLeakTracker;

import java.io.Closeable;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.locks.Lock;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * This class represents a locked path within the inode tree, starting from the root.
 *
 * The mUri and mPathComponents fields are immutable and contain the "target" path represented by
 * the LockedInodePath.
 *
 * mLockList manages the locks held by the LockedInodePath.
 *
 * The mExistingInodes list holds inodes that are locked and known to exist. The inodes might not be
 * individually locked - if one inode is write-locked, the inodes after it are implicitly
 * write-locked and may be added to mExistingInodes. The inodes in mExistingInodes are a prefix of
 * the inodes referenced by mPathComponents.
 *
 * If the full path exists, mExistingInodes will have size equal to mPathComponents.length after
 * traversal. mExistingInodes is always at least as long as the list of inodes directly locked by
 * mLockList.
 *
 * To create new inode paths from an existing path, use one of the lock*() methods. They return new
 * locked inode paths that can be modified and closed without affecting the original path. Modifying
 * the original inode path invalidates any subpaths that it has created.
 *
 * Locked inode paths are not threadsafe and should not be shared across threads.
 */
@NotThreadSafe
public class LockedInodePath implements Closeable {

  private static final ResourceLeakDetector DETECTOR =
      AlluxioResourceLeakDetectorFactory.instance().newResourceLeakDetector(LockedInodePath.class);
  /**
   * The root inode of the inode tree. This is needed to bootstrap the inode path.
   */
  private final Inode mRoot;
  /** Inode store for looking up children. */
  private final ReadOnlyInodeStore mInodeStore;

  /** Uri for the path represented. */
  protected final AlluxioURI mUri;
  /** The components of mUri. */
  protected final String[] mPathComponents;
  /** Lock list locking some portion of the path according to mLockPattern. */
  protected final InodeLockList mLockList;
  /** The locking pattern. */
  protected LockPattern mLockPattern;
  /** Whether to use {@link Lock#tryLock()} or {@link Lock#lock()}. */
  private final boolean mUseTryLock;
  /** Tracker used for logging leaked resources. */
  @Nullable
  private final ResourceLeakTracker mTracker;
  /** To determine if we should flush the journals when lock is released or scope reduced. */
  private final boolean mMergeInodeJournals;

  /**
   * Keeps a reference of JournalContext and flushes it before the lock is released.
   * This is used to prevent inconsistency between primary and standby.
   * A typical write operation in FileSystem looks like:
   *    * try (createJournalContext()) {
   *   try (lockInodePath(uri)) {
   *     // File System Operations
   *     // ...
   *   }
   * }
   * 
   * *
   * Because the inode path lock is released ahead of the close of the journal context,
   * other requests can do other file system operations on the inode path we locked
   * before the journals are actually flushed and committed. If a failover happens,
   * the client might observe an inconsistent view comparing to the one on the previous master.
   *
   * We keep the journal context instance in the LockedInodePath to mitigate the issue,
   * by forcing to flush journals before the lock is released.
   * {@link JournalContext#flush()} always commit the journals except the ones used in
   * metadata sync.
   * For performance consideration,
   * {@link alluxio.master.journal.MetadataSyncMergeJournalContext} only
   * appends journals to the async journal writer and these journals will be committed later.
   *
   * This also helps keep the ordering of the committed journals
   * if {@link alluxio.master.journal.FileSystemMergeJournalContext} is used because
   * in its implement, journals will not be queued until the context is closed or the flush method
   * is called. Given the LockedInodePath is closed ahead of the FileSystemMergeJournalContext,
   * another request can potentially quickly do a file system operation on the same file
   * and commits its journals, before the current thread commits its journal, which
   * resulted in the journals being committed in an incorrect order.
   * Flushing journals before closing the LockedInodePath object solves this issue.
   *
   * Note that this still doesn't solve the inconsistency between primary and standby
   * if the primary crashes when it has been doing a file system operation but hasn't committed
   * journals to standby.
   *
   * When MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS is enabled, releasing inode tree locks
   * will flush the entire journal context for the operation. Hence whenever a lock is released,
   * the operation must be sure that it has made all necessary updates to
   * any journal modifications that should be visible atomically.
   */
  private final JournalContext mJournalContext;

  /**
   * Creates a new locked inode path.
   *
   * @param uri the uri for the path
   * @param inodeStore the inode store for looking up inode children
   * @param inodeLockManager the inode lock manager
   * @param root the root inode
   * @param lockPattern the pattern to lock in
   * @param tryLock whether or not use {@link Lock#tryLock()} or {@link Lock#lock()}
   * @param journalContext the journal context to flush when the lock is released
   */
  public LockedInodePath(AlluxioURI uri, ReadOnlyInodeStore inodeStore,
      InodeLockManager inodeLockManager, InodeDirectory root, LockPattern lockPattern,
      boolean tryLock, JournalContext journalContext)
      throws InvalidPathException {
    mUri = uri;
    mPathComponents = PathUtils.getPathComponents(uri.getPath());
    mInodeStore = inodeStore;
    mLockPattern = lockPattern;
    mRoot = root;
    mUseTryLock = tryLock;
    mLockList = new SimpleInodeLockList(inodeLockManager, mUseTryLock);
    mTracker = DETECTOR.track(this);
    mJournalContext = journalContext;
    mMergeInodeJournals = Configuration.getBoolean(
        PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS
    ) && mJournalContext instanceof FileSystemMergeJournalContext;
  }

  /**
   * Creates a new locked inode path, using a prefix locked inode path as a starting point.
   *
   * @param uri the uri for the new path
   * @param path the path to use as a starting point
   * @param pathComponents components of the uri
   * @param lockPattern the pattern to lock in
   */
  private LockedInodePath(AlluxioURI uri, LockedInodePath path, String[] pathComponents,
      LockPattern lockPattern, boolean tryLock) {
    Preconditions.checkState(!path.mLockList.isEmpty());
    mUri = uri;
    mPathComponents = pathComponents;
    mInodeStore = path.mInodeStore;
    mLockList = new CompositeInodeLockList(path.mLockList, tryLock);
    mLockPattern = lockPattern;
    mRoot = path.mLockList.get(0);
    mUseTryLock = tryLock;
    mTracker = DETECTOR.track(this);
    // LockedInodePath is not thread safe and should not be shared across threads.
    // So the new created LockInodePath instance must be on the same thread with
    // the original one and hence they will use the same JournalContext.
    mJournalContext = path.mJournalContext;
    mMergeInodeJournals = Configuration.getBoolean(
        PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS
    ) && mJournalContext instanceof FileSystemMergeJournalContext;
  }

  /**
   * @return the full uri of the path
   */
  public AlluxioURI getUri() {
    return mUri;
  }

  /**
   * @return the target inode
   * @throws FileDoesNotExistException if the target inode does not exist
   */
  public Inode getInode() throws FileDoesNotExistException {
    Inode inode = getInodeOrNull();
    if (inode == null) {
      throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri));
    }
    return inode;
  }

  /**
   * @return the target inode, or null if it does not exist
   */
  @Nullable
  public Inode getInodeOrNull() {
    if (!fullPathExists()) {
      return null;
    }
    return mLockList.get(mLockList.numInodes() - 1);
  }

  /**
   * @return the target inode as an {@link MutableInodeFile}
   * @throws FileDoesNotExistException if the target inode does not exist, or it is not a file
   */
  public InodeFile getInodeFile() throws FileDoesNotExistException {
    Inode inode = getInode();
    if (!inode.isFile()) {
      throw new FileDoesNotExistException(ExceptionMessage.PATH_MUST_BE_FILE.getMessage(mUri));
    }
    return inode.asFile();
  }

  /**
   * @return the parent of the target inode
   * @throws InvalidPathException if the parent inode is not a directory
   * @throws FileDoesNotExistException if the parent of the target does not exist
   */
  public InodeDirectory getParentInodeDirectory()
      throws InvalidPathException, FileDoesNotExistException {
    Inode inode = getParentInodeOrNull();
    if (inode == null) {
      throw new FileDoesNotExistException(
          ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri.getParent()));
    }
    if (!inode.isDirectory()) {
      throw new InvalidPathException(
          ExceptionMessage.PATH_MUST_HAVE_VALID_PARENT.getMessage(mUri));
    }
    return (InodeDirectory) inode;
  }

  /**
   * @return the parent of the target inode, or null if the parent does not exist
   */
  @Nullable
  public Inode getParentInodeOrNull() {
    if (mPathComponents.length < 2 || mLockList.numInodes() < (mPathComponents.length - 1)) {
      // The path is only the root, or the list of inodes is not long enough to contain the parent
      return null;
    }
    return mLockList.get(mPathComponents.length - 2);
  }

  /**
   * @return the last existing inode on the inode path. This could be out of date if the current
   *         thread has added or deleted inodes since the last call to traverse()
   */
  public Inode getLastExistingInode() {
    return mLockList.get(mLockList.numInodes() - 1);
  }

  /**
   * @return a copy of the list of existing inodes, from the root
   */
  public List getInodeList() {
    return mLockList.getLockedInodes();
  }

  /**
   * @return a copy of the list of existing inodes, from the root
   */
  public List getInodeViewList() {
    return mLockList.getLockedInodeViews();
  }

  /**
   * @return the number of existing inodes in this path. This could be out of date if the current
   *         thread has added or deleted inodes since the last call to traverse()
   */
  public int getExistingInodeCount() {
    return mLockList.numInodes();
  }

  /**
   * @return number of components in this locked path
   */
  public int size() {
    return mPathComponents.length;
  }

  /**
   * @return true if the entire path of inodes exists, false otherwise. This could be out of date if
   *         the current thread has added or deleted inodes since the last call to traverse()
   */
  public boolean fullPathExists() {
    return mLockList.numInodes() == mPathComponents.length;
  }

  /**
   * @return the {@link LockPattern} of this path
   */
  public LockPattern getLockPattern() {
    return mLockPattern;
  }

  /**
   * Removes the last inode from the list. This is necessary when the last inode is deleted and we
   * want to continue using the inodepath. This operation is only supported when the path is
   * complete.
   */
  public void removeLastInode() {
    Preconditions.checkState(fullPathExists());

    maybeFlushJournals();

    mLockList.unlockLastInode();
  }

  /**
   * Adds the next inode to the path. This tries to reduce the scope of locking by moving the write
   * lock forward to the new final edge, downgrading the previous write lock to a read lock.
   *
   * @param inode the inode to add
   */
  public void addNextInode(Inode inode) {
    Preconditions.checkState(mLockPattern == LockPattern.WRITE_EDGE);
    Preconditions.checkState(!fullPathExists());
    Preconditions.checkState(inode.getName().equals(mPathComponents[mLockList.numInodes()]));

    int nextInodeIndex = mLockList.numInodes() + 1;
    if (nextInodeIndex < mPathComponents.length) {
      // We need to flush the pending journals into the writer
      // before the lock scope is reduced.
      maybeFlushJournals();
      mLockList.pushWriteLockedEdge(inode, mPathComponents[nextInodeIndex]);
    } else {
      mLockList.lockInode(inode, LockMode.WRITE);
    }
  }

  /**
   * Downgrades all locks in this list to read locks.
   */
  public void downgradeToRead() {
    maybeFlushJournals();
    mLockList.downgradeToReadLocks();
    mLockPattern = LockPattern.READ;
  }

  /**
   * Returns the closest ancestor of the target inode (last inode in the full path).
   *
   * @return the closest ancestor inode
   * @throws FileDoesNotExistException if an ancestor does not exist
   */
  public Inode getAncestorInode() throws FileDoesNotExistException {
    int ancestorIndex = mPathComponents.length - 2;
    if (ancestorIndex < 0) {
      throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(mUri));
    }
    ancestorIndex = Math.min(ancestorIndex, mLockList.numInodes() - 1);
    return mLockList.get(ancestorIndex);
  }

  /**
   * Locks a descendant of the current path and returns a new locked inode path. The path is
   * traversed according to the lock pattern. Closing the new path will have no effect on the
   * current path.
   *
   * On failure, all locks taken by this method will be released.
   *
   * @param descendantUri the full descendent uri starting from the root
   * @param lockPattern the lock pattern to lock in
   * @return the new locked path
   */
  public LockedInodePath lockDescendant(AlluxioURI descendantUri, LockPattern lockPattern)
      throws InvalidPathException {
    LockedInodePath path = new LockedInodePath(descendantUri, this,
        PathUtils.getPathComponents(descendantUri.getPath()), lockPattern, mUseTryLock);
    path.traverseOrClose();
    return path;
  }

  /**
   * Returns a new locked inode path composed of the current path plus the child inode. The path is
   * traversed according to the lock pattern. The original locked inode path is unaffected.
   *
   * childComponentsHint can be used to save the work of computing path components when the path
   * components for the new path are already known.
   *
   * On failure, all locks taken by this method will be released.
   *
   * @param child the child inode
   * @param lockPattern the lock pattern
   * @return the new locked path
   */
  public LockedInodePath lockChild(Inode child, LockPattern lockPattern)
      throws InvalidPathException {
    return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName()));
  }

  /**
   * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path
   * components are already known.
   *
   * @param child the child inode
   * @param lockPattern the lock pattern
   * @param childComponentsHint path components for the new path
   * @return the new locked path
   */
  public LockedInodePath lockChild(Inode child, LockPattern lockPattern,
      String[] childComponentsHint) throws InvalidPathException {
    return lockChildByName(child.getName(), lockPattern, childComponentsHint);
  }

  /**
   * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path
   * components are already known.
   *
   * @param childName the name of the child inode
   * @param lockPattern the lock pattern
   * @param childComponentsHint path components for the new path
   * @return the new locked path
   */
  public LockedInodePath lockChildByName(String childName, LockPattern lockPattern,
      String[] childComponentsHint) throws InvalidPathException {
    LockedInodePath path = new LockedInodePath(mUri.joinUnsafe(childName), this,
        childComponentsHint, lockPattern, mUseTryLock);
    path.traverseOrClose();
    return path;
  }

  private static String[] addComponent(String[] components, String component) {
    String[] newComponents = new String[components.length + 1];
    System.arraycopy(components, 0, newComponents, 0, components.length);
    newComponents[components.length] = component;
    return newComponents;
  }

  /**
   * Returns a copy of the path with the final edge write locked. This requires that we haven't
   * already locked the final edge, i.e. the path is incomplete.
   *
   * @return the new locked path
   */
  public LockedInodePath lockFinalEdgeWrite() throws InvalidPathException {
    Preconditions.checkState(!fullPathExists());

    LockedInodePath newPath =
        new LockedInodePath(mUri, this, mPathComponents, LockPattern.WRITE_EDGE, mUseTryLock);
    newPath.traverse();
    return newPath;
  }

  private void traverseOrClose() throws InvalidPathException {
    try {
      traverse();
    } catch (Throwable t) {
      close();
      throw t;
    }
  }

  /**
   * Traverses the inode path according to its lock pattern. If the inode path is already partially
   * traversed, this method will pick up where the previous traversal left off.
   *
   * On return, all existing inodes in the path are added to mExistingInodes and the inodes are
   * locked according to {@link LockPattern}.
   *
   * Journals are not flushed in this method because:
   * 1. When a LockedInodePath is created, it will be traversed first before any journal is written
   * 2. The only use cases that call this method independently are in metadata sync
   * {@link alluxio.master.file.InodeSyncStream}. Where traverse() is called right before the
   * lock is released, where the journals will be flushed anyway.
   */
  public void traverse() throws InvalidPathException {
    // This locks the root edge and inode.
    bootstrapTraversal();

    // Each iteration either locks a new inode/edge or hits a missing inode and returns.
    while (!fullPathExists()) {
      int lastInodeIndex = mLockList.numInodes() - 1;
      String nextComponent = mPathComponents[lastInodeIndex + 1];
      boolean isFinalComponent = lastInodeIndex == mPathComponents.length - 2;

      Inode lastInode = mLockList.get(lastInodeIndex);
      if (mLockList.endsInInode()) { // Lock an edge next.
        if (mLockPattern == LockPattern.WRITE_EDGE && isFinalComponent) {
          mLockList.lockEdge(lastInode, nextComponent, LockMode.WRITE);
        } else {
          mLockList.lockEdge(lastInode, nextComponent, LockMode.READ);
        }
      } else { // Lock an inode next.
        if (!lastInode.isDirectory()) {
          throw new InvalidPathException(String.format(
              "Traversal failed for path %s. Component %s(%s) is a file, not a directory.", mUri,
              lastInodeIndex, lastInode.getName()));
        }
        Optional nextInodeOpt =
            mInodeStore.getChild(lastInode.asDirectory(), nextComponent);
        if (!nextInodeOpt.isPresent() && mLockPattern == LockPattern.WRITE_EDGE
            && !isFinalComponent) {
          // This pattern requires that we obtain a write lock on the final edge, so we must
          // upgrade to a write lock.
          mLockList.unlockLastEdge();
          mLockList.lockEdge(lastInode, nextComponent, LockMode.WRITE);
          nextInodeOpt = mInodeStore.getChild(lastInode.asDirectory(), nextComponent);
          if (nextInodeOpt.isPresent()) {
            // The component must have been created between releasing the read lock and acquiring
            // the write lock. Downgrade and continue as normal.
            mLockList.downgradeLastEdge();
          }
        }
        if (!nextInodeOpt.isPresent()) {
          if (mLockPattern == LockPattern.READ) {
            // WRITE_INODE and WRITE_EDGE should lock the last non-existing edge.
            mLockList.unlockLastEdge();
          }
          return;
        }
        Inode nextInode = nextInodeOpt.get();

        if (isFinalComponent && mLockPattern.isWrite()) {
          mLockList.lockInode(nextInode, LockMode.WRITE);
        } else {
          mLockList.lockInode(nextInode, LockMode.READ);
        }
      }
    }
  }

  private void bootstrapTraversal() {
    if (!mLockList.isEmpty()) {
      return;
    }
    LockMode edgeLock = LockMode.READ;
    LockMode inodeLock = LockMode.READ;
    if (mPathComponents.length == 1) {
      if (mLockPattern == LockPattern.WRITE_EDGE) {
        edgeLock = LockMode.WRITE;
        inodeLock = LockMode.WRITE;
      } else if (mLockPattern == LockPattern.WRITE_INODE) {
        inodeLock = LockMode.WRITE;
      }
    }
    mLockList.lockRootEdge(edgeLock);
    mLockList.lockInode(mRoot, inodeLock);
  }

  @Override
  public void close() {
    try {
      maybeFlushJournals();
    } finally {
      // releases the locks in case journal flush failed
      if (mTracker != null) {
        mTracker.close(this);
      }
      mLockList.close();
    }
  }

  @Override
  public String toString() {
    return mUri.toString();
  }

  private void maybeFlushJournals() {
    if (mMergeInodeJournals) {
      try {
        mJournalContext.flush();
      } catch (UnavailableException e) {
        throw new RuntimeException(e);
      }
    }
  }
}