alluxio.master.file.meta.InodeTree Maven / Gradle / Ivy
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.file.meta;
import alluxio.AlluxioURI;
import alluxio.client.WriteType;
import alluxio.collections.Pair;
import alluxio.concurrent.LockMode;
import alluxio.conf.PropertyKey;
import alluxio.conf.ServerConfiguration;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.PreconditionMessage;
import alluxio.exception.status.UnavailableException;
import alluxio.grpc.CreateDirectoryPOptions;
import alluxio.grpc.FileSystemMasterCommonPOptions;
import alluxio.master.block.ContainerIdGenerable;
import alluxio.master.file.RpcContext;
import alluxio.master.file.contexts.CreateDirectoryContext;
import alluxio.master.file.contexts.CreateFileContext;
import alluxio.master.file.contexts.CreatePathContext;
import alluxio.master.journal.DelegatingJournaled;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.Journaled;
import alluxio.master.metastore.DelegatingReadOnlyInodeStore;
import alluxio.master.metastore.InodeStore;
import alluxio.master.metastore.ReadOnlyInodeStore;
import alluxio.proto.journal.File.DeleteFileEntry;
import alluxio.proto.journal.File.NewBlockEntry;
import alluxio.proto.journal.File.RenameEntry;
import alluxio.proto.journal.File.SetAclEntry;
import alluxio.proto.journal.File.UpdateInodeDirectoryEntry;
import alluxio.proto.journal.File.UpdateInodeEntry;
import alluxio.proto.journal.File.UpdateInodeFileEntry;
import alluxio.resource.CloseableResource;
import alluxio.resource.LockResource;
import alluxio.retry.ExponentialBackoffRetry;
import alluxio.retry.RetryPolicy;
import alluxio.security.authorization.AccessControlList;
import alluxio.security.authorization.DefaultAccessControlList;
import alluxio.security.authorization.Mode;
import alluxio.underfs.UfsStatus;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.options.MkdirsOptions;
import alluxio.util.CommonUtils;
import alluxio.util.interfaces.Scoped;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
/**
* Represents the tree of Inodes.
*/
@NotThreadSafe
// TODO(jiri): Make this class thread-safe.
public class InodeTree implements DelegatingJournaled {
private static final Logger LOG = LoggerFactory.getLogger(InodeTree.class);
/** The base amount (exponential backoff) to sleep before retrying persisting an inode. */
private static final int PERSIST_WAIT_BASE_SLEEP_MS = 2;
/** Maximum amount (exponential backoff) to sleep before retrying persisting an inode. */
private static final int PERSIST_WAIT_MAX_SLEEP_MS = 1000;
/** The maximum retries for persisting an inode. */
private static final int PERSIST_WAIT_MAX_RETRIES = 50;
/** Value to be used for an inode with no parent. */
public static final long NO_PARENT = -1;
/**
* Patterns of inode path locking.
*/
public enum LockPattern {
/**
* Read lock every existing inode and edge along the path. Useful when we want to read an inode
* without modifying anything.
*
* Examples
*
* path to lock: /a/b/c
* existing inodes: /a/b
* result: Read locks on [a, a->b, b]
*
* path to lock: /a/b/c
* existing inodes: /a/b/c
* result: Read locks on [a, a->b, b, b->c, c]
*
* path to lock: /a/b/c
* existing inodes: /a
* result: Read locks on [a]
*/
READ,
/**
* Read lock every existing inode and edge along the path, but write lock the final inode if it
* exists. If the inode does not exist, read lock the edge leading out of the final existing
* ancestor. Useful when we want to modify an inode's metadata without changing the structure
* of the inode tree (no create/rename/delete).
*
* Examples
*
* path to lock: /a/b/c
* existing inodes: /a/b
* result: Read locks on [a, a->b, b, b->c]
*
* path to lock: /a/b/c
* existing inodes: /a/b/c
* result: Read locks on [a, a->b, b, b->c], Write locks on [c]
*
* path to lock: /a/b/c
* existing inodes: /a
* result: Read locks on [a, a->b]
*/
WRITE_INODE,
/**
* Read lock every existing inode and edge along the path, but write lock the edge leading out
* of the last existing ancestor. Useful when we want to modify the structure of the inode tree,
* e.g. when creating, deleting, or renaming inodes.
*
* Examples
*
* path to lock: /a/b/c
* existing inodes: /a/b
* result: Read locks on [a, a->b, b], Write locks on [b->c]
*
* path to lock: /a/b/c
* existing inodes: /a/b/c
* result: Read locks on [a, a->b, b], Write locks on [b->c, c]
*
* path to lock: /a/b/c
* existing inodes: /a
* result: Read locks on [a], Write locks [a->b]
*/
WRITE_EDGE;
/**
* @return whether the lock pattern is one of the write-type patterns
*/
public boolean isWrite() {
return this == WRITE_INODE || this == WRITE_EDGE;
}
}
/** Only the root inode should have the empty string as its name. */
public static final String ROOT_INODE_NAME = "";
/** Path of the root inode. */
public static final String ROOT_PATH = "/";
/** Number of retries when trying to lock a path, from a given id. */
public static final int PATH_TRAVERSAL_RETRIES = 1000;
/** Mount table manages the file system mount points. */
private final MountTable mMountTable;
private final TtlBucketList mTtlBuckets;
/** Manager for inode locking. */
private final InodeLockManager mInodeLockManager;
private final ReadOnlyInodeStore mInodeStore;
/**
* Class for managing the persistent state of the inode tree. All metadata changes must go
* through this class by calling mState.applyAndJournal(context, entry).
*/
private final InodeTreePersistentState mState;
/**
* Inode id management. Inode ids are essentially block ids.
*
* inode files: Each file id will be composed of a unique block container id, with the maximum
* sequence number.
*
* inode directories: Each directory id will be a unique block id, in order to avoid any collision
* with file ids.
*/
private final ContainerIdGenerable mContainerIdGenerator;
private final InodeDirectoryIdGenerator mDirectoryIdGenerator;
/**
* @param inodeStore the inode store
* @param containerIdGenerator the container id generator to use to get new container ids
* @param directoryIdGenerator the directory id generator to use to get new directory ids
* @param mountTable the mount table to manage the file system mount points
* @param lockManager inode lock manager
*/
public InodeTree(InodeStore inodeStore, ContainerIdGenerable containerIdGenerator,
InodeDirectoryIdGenerator directoryIdGenerator, MountTable mountTable,
InodeLockManager lockManager) {
mInodeStore = new DelegatingReadOnlyInodeStore(inodeStore);
mTtlBuckets = new TtlBucketList(mInodeStore);
mInodeLockManager = lockManager;
mState = new InodeTreePersistentState(inodeStore, mInodeLockManager, mTtlBuckets);
mContainerIdGenerator = containerIdGenerator;
mDirectoryIdGenerator = directoryIdGenerator;
mMountTable = mountTable;
}
/**
* Initializes the root of the inode tree.
*
* @param owner the root owner
* @param group the root group
* @param mode the root mode
* @param context the journal context to journal the initialization to
*/
public void initializeRoot(String owner, String group, Mode mode, JournalContext context)
throws UnavailableException {
if (mState.getRoot() == null) {
MutableInodeDirectory root = MutableInodeDirectory.create(
mDirectoryIdGenerator.getNewDirectoryId(context), NO_PARENT, ROOT_INODE_NAME,
CreateDirectoryContext
.mergeFrom(CreateDirectoryPOptions.newBuilder().setMode(mode.toProto()))
.setOwner(owner).setGroup(group));
root.setPersistenceState(PersistenceState.PERSISTED);
mState.applyAndJournal(context, root, ROOT_PATH);
}
}
/**
* @return the list of TTL buckets for tracking inode TTLs
*/
public TtlBucketList getTtlBuckets() {
return mTtlBuckets;
}
/**
* @return the number of inodes in the inode tree
*/
public long getInodeCount() {
return mState.getInodeCount();
}
/**
* Marks an inode directory as having its direct children loaded.
*
* @param context journal context supplier
* @param dir the inode directory
*/
public void setDirectChildrenLoaded(Supplier context, InodeDirectory dir) {
mState.applyAndJournal(context, UpdateInodeDirectoryEntry.newBuilder()
.setId(dir.getId())
.setDirectChildrenLoaded(true)
.build());
}
/**
* @param context journal context supplier
* @param entry an entry representing an add block operation
* @return the new block id
*/
public long newBlock(Supplier context, NewBlockEntry entry) {
return mState.applyAndJournal(context, entry);
}
/**
* @param context journal context supplier
* @param entry an entry representing an update inode file operation
*/
public void updateInodeFile(Supplier context, UpdateInodeFileEntry entry) {
mState.applyAndJournal(context, entry);
}
/**
* @param context journal context supplier
* @param entry an entry representing an update inode operation
*/
public void updateInode(Supplier context, UpdateInodeEntry entry) {
mState.applyAndJournal(context, entry);
}
/**
* @param inodeId id of inode to be updated
* @param accessTime the new access time
* @return the applied inode entry
*/
public UpdateInodeEntry updateInodeAccessTimeNoJournal(long inodeId, long accessTime) {
return mState.applyInodeAccessTime(inodeId, accessTime);
}
/**
* @param context journal context supplier
* @param entry an entry representing a rename operation
*/
public void rename(Supplier context, RenameEntry entry) {
mState.applyAndJournal(context, entry);
}
/**
* @param context journal context supplier
* @param entry an entry representing a set acl operation
*/
public void setAcl(Supplier context, SetAclEntry entry) {
mState.applyAndJournal(context, entry);
}
/**
* @return username of root of inode tree, null if the inode tree is not initialized
*/
@Nullable
public String getRootUserName() {
if (mState.getRoot() == null) {
return null;
}
return mState.getRoot().getOwner();
}
/**
* @return the number of pinned inodes
*/
public int getPinnedSize() {
return mState.getPinnedInodeFileIds().size();
}
/**
* @param id the id to get the inode for
* @return whether the inode exists
*/
public boolean inodeIdExists(long id) {
return mInodeStore.get(id).isPresent();
}
/**
* Locks existing inodes on the specified path, in the specified {@link LockPattern}. The target
* inode is not required to exist.
*
* @param uri the uri to lock
* @param lockPattern the {@link LockPattern} to lock the inodes with
* @return the {@link LockedInodePath} representing the locked path of inodes
* @throws InvalidPathException if the path is invalid
*/
public LockedInodePath lockInodePath(AlluxioURI uri, LockPattern lockPattern)
throws InvalidPathException {
LockedInodePath inodePath =
new LockedInodePath(uri, mInodeStore, mInodeLockManager, getRoot(), lockPattern);
try {
inodePath.traverse();
} catch (InvalidPathException e) {
inodePath.close();
throw e;
}
return inodePath;
}
/**
* @param uri the {@link AlluxioURI} to check for existence
* @return whether the inode exists
*/
public boolean inodePathExists(AlluxioURI uri) {
try (LockedInodePath inodePath = lockInodePath(uri, LockPattern.READ)) {
return inodePath.fullPathExists();
} catch (InvalidPathException e) {
return false;
}
}
/**
* Locks a path and throws an exception if the path does not exist.
*
* @param uri a uri to lock
* @param lockPattern the pattern to lock with
* @return a locked inode path for the uri
*/
public LockedInodePath lockFullInodePath(AlluxioURI uri, LockPattern lockPattern)
throws InvalidPathException, FileDoesNotExistException {
LockedInodePath inodePath = lockInodePath(uri, lockPattern);
if (!inodePath.fullPathExists()) {
inodePath.close();
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri));
}
return inodePath;
}
/**
* Locks a path and throws an exception if the path does not exist.
*
* @param id the inode id to lock
* @param lockPattern the pattern to lock with
* @return a locked inode path for the uri
*/
public LockedInodePath lockFullInodePath(long id, LockPattern lockPattern)
throws FileDoesNotExistException {
LockedInodePath inodePath = lockInodePathById(id, lockPattern);
if (!inodePath.fullPathExists()) {
inodePath.close();
throw new FileDoesNotExistException(ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(id));
}
return inodePath;
}
/**
* Locks existing inodes on the path to the inode specified by an id, in the specified
* {@link LockPattern}. The target inode must exist. This may require multiple traversals of the
* tree, so may be inefficient.
*
* @param id the inode id
* @param lockPattern the {@link LockPattern} to lock the inodes with
* @return the {@link LockedInodePath} representing the locked path of inodes
* @throws FileDoesNotExistException if the target inode does not exist
*/
private LockedInodePath lockInodePathById(long id, LockPattern lockPattern)
throws FileDoesNotExistException {
int count = 0;
while (true) {
Optional inode = mInodeStore.get(id);
if (!inode.isPresent()) {
throw new FileDoesNotExistException(ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(id));
}
// Compute the path given the target inode.
StringBuilder builder = new StringBuilder();
computePathForInode(inode.get(), builder);
AlluxioURI uri = new AlluxioURI(builder.toString());
boolean valid = false;
LockedInodePath inodePath = null;
try {
inodePath = lockInodePath(uri, lockPattern);
if (inodePath.getInode().getId() == id) {
// Set to true, so the path is not unlocked before returning.
valid = true;
return inodePath;
}
// The path does not end up at the target inode id. Repeat the traversal.
} catch (InvalidPathException e) {
// ignore and repeat the loop
LOG.debug("Inode lookup id {} computed path {} mismatch id. Repeating.", id, uri);
} finally {
if (!valid && inodePath != null) {
inodePath.close();
}
}
count++;
if (count > PATH_TRAVERSAL_RETRIES) {
throw new FileDoesNotExistException(
ExceptionMessage.INODE_DOES_NOT_EXIST_RETRIES.getMessage(id));
}
}
}
/**
* Locks existing inodes on the two specified paths. The two paths will be locked in the correct
* order. The target inodes are not required to exist.
*
* @param path1 the first path to lock
* @param lockPattern1 the locking pattern for the first path
* @param path2 the second path to lock
* @param lockPattern2 the locking pattern for the second path
* @return a {@link InodePathPair} representing the two locked paths
* @throws InvalidPathException if a path is invalid
*/
public InodePathPair lockInodePathPair(AlluxioURI path1, LockPattern lockPattern1,
AlluxioURI path2, LockPattern lockPattern2) throws InvalidPathException {
LockedInodePath lockedPath1 = null;
LockedInodePath lockedPath2 = null;
boolean valid = false;
try {
// Lock paths in a deterministic order.
if (path1.getPath().compareTo(path2.getPath()) > 0) {
lockedPath2 = lockInodePath(path2, lockPattern2);
lockedPath1 = lockInodePath(path1, lockPattern1);
} else {
lockedPath1 = lockInodePath(path1, lockPattern1);
lockedPath2 = lockInodePath(path2, lockPattern2);
}
valid = true;
return new InodePathPair(lockedPath1, lockedPath2);
} finally {
if (!valid) {
if (lockedPath1 != null) {
lockedPath1.close();
}
if (lockedPath2 != null) {
lockedPath2.close();
}
}
}
}
/**
* Attempts to extend an existing {@link LockedInodePath} to reach the target inode (the last
* inode for the full path). If the target inode does not exist, an exception will be thrown.
*
* @param inodePath the {@link LockedInodePath} to extend to the target inode
* @throws InvalidPathException if the path is invalid
* @throws FileDoesNotExistException if the target inode does not exist
*/
public void ensureFullInodePath(LockedInodePath inodePath)
throws InvalidPathException, FileDoesNotExistException {
if (inodePath.fullPathExists()) {
return;
}
inodePath.traverse();
if (!inodePath.fullPathExists()) {
throw new FileDoesNotExistException(
ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(inodePath.getUri()));
}
}
/**
* Appends components of the path from a given inode.
*
* @param inode the inode to compute the path for
* @param builder a {@link StringBuilder} that is updated with the path components
* @throws FileDoesNotExistException if an inode in the path does not exist
*/
private void computePathForInode(InodeView inode, StringBuilder builder)
throws FileDoesNotExistException {
long id;
long parentId;
String name;
try (LockResource lr = mInodeLockManager.lockInode(inode, LockMode.READ)) {
id = inode.getId();
parentId = inode.getParentId();
name = inode.getName();
}
if (isRootId(id)) {
builder.append(AlluxioURI.SEPARATOR);
} else if (isRootId(parentId)) {
builder.append(AlluxioURI.SEPARATOR);
builder.append(name);
} else {
Optional parentInode = mInodeStore.get(parentId);
if (!parentInode.isPresent()) {
throw new FileDoesNotExistException(
ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(parentId));
}
computePathForInode(parentInode.get(), builder);
builder.append(AlluxioURI.SEPARATOR);
builder.append(name);
}
}
/**
* Returns the path for a particular inode. The inode and the path to the inode must already be
* locked.
*
* @param inode the inode to get the path for
* @return the {@link AlluxioURI} for the path of the inode
* @throws FileDoesNotExistException if the path does not exist
*/
public AlluxioURI getPath(InodeView inode) throws FileDoesNotExistException {
StringBuilder builder = new StringBuilder();
computePathForInode(inode, builder);
return new AlluxioURI(builder.toString());
}
/**
* @return the root inode
*/
public InodeDirectory getRoot() {
return mState.getRoot();
}
/**
* Creates a file or directory at path.
*
* This method expects that the last edge leading to the target inode to be write-locked. If the
* last existing inode in the path is /a/b/c and we want to create /a/b/c/d/e, the c->d edge must
* be write locked.
*
* On success, createPath attempts to push the write lock forward as far as possible. For the
* above example, createPath would take a write lock on d->e, and downgrade the c->d lock from a
* write lock to a read lock. This may not be possible if inodePath is a composite path which
* doesn't own the write lock. In that case no downgrade will occur.
*
* @param rpcContext the rpc context
* @param inodePath the path
* @param context method context
* @return a list of created inodes
* @throws FileAlreadyExistsException when there is already a file at path if we want to create a
* directory there
* @throws BlockInfoException when blockSizeBytes is invalid
* @throws InvalidPathException when path is invalid, for example, (1) when there is nonexistent
* necessary parent directories and recursive is false, (2) when one of the necessary
* parent directories is actually a file
* @throws FileDoesNotExistException if the parent of the path does not exist and the recursive
* option is false
*/
public List createPath(RpcContext rpcContext, LockedInodePath inodePath,
CreatePathContext, ?> context) throws FileAlreadyExistsException, BlockInfoException,
InvalidPathException, IOException, FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
// TODO(gpang): consider splitting this into createFilePath and createDirectoryPath, with a
// helper method for the shared logic.
AlluxioURI path = inodePath.getUri();
if (path.isRoot()) {
String errorMessage = ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(path);
LOG.error(errorMessage);
throw new FileAlreadyExistsException(errorMessage);
}
if (inodePath.fullPathExists()) {
if (context instanceof CreateDirectoryContext
&& ((CreateDirectoryContext) context).getOptions().getAllowExists()) {
return new ArrayList<>();
} else {
throw new FileAlreadyExistsException(path);
}
}
if (context instanceof CreateFileContext) {
CreateFileContext fileContext = (CreateFileContext) context;
if (fileContext.getOptions().getBlockSizeBytes() < 1) {
throw new BlockInfoException(
"Invalid block size " + fileContext.getOptions().getBlockSizeBytes());
}
}
LOG.debug("createPath {}", path);
String[] pathComponents = inodePath.mPathComponents;
String name = path.getName();
// pathIndex is the index into pathComponents where we start filling in the path from the inode.
int pathIndex = inodePath.getExistingInodeCount();
if (pathIndex < pathComponents.length - 1) {
// The immediate parent was not found. If it's not recursive, we throw an exception here.
// Otherwise we add the remaining path components to the list of components to create.
if (!context.isRecursive()) {
throw new FileDoesNotExistException(String.format(
"File %s creation failed. Component %d(%s) does not exist",
path, pathIndex, pathComponents[pathIndex]));
}
}
// The ancestor inode (parent or ancestor) of the target path.
Inode ancestorInode = inodePath.getAncestorInode();
if (!ancestorInode.isDirectory()) {
throw new InvalidPathException("Could not traverse to parent directory of path " + path
+ ". Component " + pathComponents[pathIndex - 1] + " is not a directory.");
}
InodeDirectoryView currentInodeDirectory = ancestorInode.asDirectory();
List createdInodes = new ArrayList<>();
if (context.isPersisted()) {
// Synchronously persist directories. These inodes are already READ locked.
for (Inode inode : inodePath.getInodeList()) {
if (!inode.isPersisted()) {
// This cast is safe because we've already verified that the file inode doesn't exist.
syncPersistExistingDirectory(rpcContext, inode.asDirectory());
}
}
}
if ((pathIndex < (pathComponents.length - 1)
|| !mInodeStore.getChild(currentInodeDirectory, name).isPresent())
&& context.getOperationTimeMs() > currentInodeDirectory.getLastModificationTimeMs()) {
// (1) There are components in parent paths that need to be created. Or
// (2) The last component of the path needs to be created.
// In these two cases, the last traversed Inode will be modified if the new timestamp is after
// the existing last modified time.
long currentId = currentInodeDirectory.getId();
try (LockResource lr = mInodeLockManager.lockUpdate(currentId)) {
long updatedLastModified = mInodeStore.get(currentId).get().getLastModificationTimeMs();
if (updatedLastModified < context.getOperationTimeMs()) {
UpdateInodeEntry.Builder updateInodeEntry = UpdateInodeEntry.newBuilder()
.setId(currentId)
.setLastModificationTimeMs(context.getOperationTimeMs())
.setLastAccessTimeMs(context.getOperationTimeMs());
if (context.getXAttr() != null) {
updateInodeEntry.putAllXAttr(CommonUtils.convertToByteString(context.getXAttr()));
}
mState.applyAndJournal(rpcContext, updateInodeEntry.build());
}
}
}
// Fill in the ancestor directories that were missing.
// NOTE, we set the mode of missing ancestor directories to be the default value, rather
// than inheriting the option of the final file to create, because it may not have
// "execute" permission.
CreateDirectoryContext missingDirContext = CreateDirectoryContext.defaults();
missingDirContext.getOptions().setCommonOptions(FileSystemMasterCommonPOptions.newBuilder()
.setTtl(context.getTtl()).setTtlAction(context.getTtlAction()));
missingDirContext.setWriteType(context.getWriteType());
missingDirContext.setOperationTimeMs(context.getOperationTimeMs());
missingDirContext.setMountPoint(false);
missingDirContext.setOwner(context.getOwner());
missingDirContext.setGroup(context.getGroup());
missingDirContext.setXAttr(context.getXAttr());
StringBuilder pathBuilder = new StringBuilder().append(
String.join(AlluxioURI.SEPARATOR, Arrays.asList(pathComponents).subList(0, pathIndex))
);
for (int k = pathIndex; k < (pathComponents.length - 1); k++) {
MutableInodeDirectory newDir = MutableInodeDirectory.create(
mDirectoryIdGenerator.getNewDirectoryId(rpcContext.getJournalContext()),
currentInodeDirectory.getId(), pathComponents[k], missingDirContext);
newDir.setPinned(currentInodeDirectory.isPinned());
inheritOwnerAndGroupIfEmpty(newDir, currentInodeDirectory);
// if the parent has default ACL, copy that default ACL as the new directory's default
// and access acl, ANDed with the umask
// if it is part of a metadata load operation, we ignore the umask and simply inherit
// the default ACL as the directory's new default and access ACL
short mode = context.isMetadataLoad() ? Mode.createFullAccess().toShort()
: newDir.getMode();
DefaultAccessControlList dAcl = currentInodeDirectory.getDefaultACL();
if (!dAcl.isEmpty()) {
Pair pair =
dAcl.generateChildDirACL(mode);
newDir.setInternalAcl(pair.getFirst());
newDir.setDefaultACL(pair.getSecond());
}
String newDirPath = k == 0 ? ROOT_PATH
: pathBuilder.append(AlluxioURI.SEPARATOR).append(pathComponents[k]).toString();
mState.applyAndJournal(rpcContext, newDir,
newDirPath);
inodePath.addNextInode(Inode.wrap(newDir));
// Persist the directory *after* it exists in the inode tree. This prevents multiple
// concurrent creates from trying to persist the same directory name.
if (context.isPersisted()) {
syncPersistExistingDirectory(rpcContext, newDir);
}
createdInodes.add(Inode.wrap(newDir));
currentInodeDirectory = newDir;
}
// Create the final path component.
MutableInode> newInode;
// create the new inode, with a write lock
if (context instanceof CreateDirectoryContext) {
CreateDirectoryContext directoryContext = (CreateDirectoryContext) context;
MutableInodeDirectory newDir = MutableInodeDirectory.create(
mDirectoryIdGenerator.getNewDirectoryId(rpcContext.getJournalContext()),
currentInodeDirectory.getId(), name, directoryContext);
inheritOwnerAndGroupIfEmpty(newDir, currentInodeDirectory);
// if the parent has default ACL, take the default ACL ANDed with the umask as the new
// directory's default and access acl
// When it is a metadata load operation, do not take the umask into account
short mode = context.isMetadataLoad() ? Mode.createFullAccess().toShort()
: newDir.getMode();
DefaultAccessControlList dAcl = currentInodeDirectory.getDefaultACL();
if (!dAcl.isEmpty()) {
Pair pair =
dAcl.generateChildDirACL(mode);
newDir.setInternalAcl(pair.getFirst());
newDir.setDefaultACL(pair.getSecond());
}
if (directoryContext.isPersisted()) {
// Do not journal the persist entry, since a creation entry will be journaled instead.
if (context.isMetadataLoad()) {
// if we are creating the file as a result of loading metadata, the newDir is already
// persisted, and we got the permissions info from the ufs.
newDir.setOwner(context.getOwner())
.setGroup(context.getGroup())
.setMode(context.getMode().toShort());
Long operationTimeMs = context.getOperationTimeMs();
if (operationTimeMs != null) {
newDir.setLastModificationTimeMs(operationTimeMs, true);
newDir.setLastAccessTimeMs(operationTimeMs, true);
}
newDir.setPersistenceState(PersistenceState.PERSISTED);
} else {
syncPersistNewDirectory(newDir);
}
}
newInode = newDir;
} else if (context instanceof CreateFileContext) {
CreateFileContext fileContext = (CreateFileContext) context;
MutableInodeFile newFile = MutableInodeFile.create(mContainerIdGenerator.getNewContainerId(),
currentInodeDirectory.getId(), name, System.currentTimeMillis(), fileContext);
inheritOwnerAndGroupIfEmpty(newFile, currentInodeDirectory);
// if the parent has a default ACL, copy that default ACL ANDed with the umask as the new
// file's access ACL.
// If it is a metadata load operation, do not consider the umask.
DefaultAccessControlList dAcl = currentInodeDirectory.getDefaultACL();
short mode = context.isMetadataLoad() ? Mode.createFullAccess().toShort() : newFile.getMode();
if (!dAcl.isEmpty()) {
AccessControlList acl = dAcl.generateChildFileACL(mode);
newFile.setInternalAcl(acl);
}
if (fileContext.isCacheable()) {
newFile.setCacheable(true);
}
if (fileContext.getWriteType() == WriteType.ASYNC_THROUGH) {
newFile.setPersistenceState(PersistenceState.TO_BE_PERSISTED);
}
newInode = newFile;
} else {
throw new IllegalStateException(String.format("Unrecognized create options: %s", context));
}
newInode.setPinned(currentInodeDirectory.isPinned());
mState.applyAndJournal(rpcContext, newInode,
inodePath.getUri().getPath());
Inode inode = Inode.wrap(newInode);
inodePath.addNextInode(inode);
createdInodes.add(inode);
LOG.debug("createFile: File Created: {} parent: {}", newInode, currentInodeDirectory);
return createdInodes;
}
// Inherit owner and group from ancestor if both are empty
private static void inheritOwnerAndGroupIfEmpty(MutableInode> newInode,
InodeDirectoryView ancestorInode) {
if (ServerConfiguration.getBoolean(PropertyKey.MASTER_METASTORE_INODE_INHERIT_OWNER_AND_GROUP)
&& newInode.getOwner().isEmpty() && newInode.getGroup().isEmpty()) {
// Inherit owner / group if empty
newInode.setOwner(ancestorInode.getOwner());
newInode.setGroup(ancestorInode.getGroup());
}
}
/**
* Returns {@link LockedInodePath}s for all descendants of inodePath.
*
* @param inodePath a locked inode path. It must be write-locked
* @return all descendants
*/
public LockedInodePathList getDescendants(LockedInodePath inodePath) {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE
|| (inodePath.getLockPattern() == LockPattern.WRITE_INODE && inodePath.fullPathExists()));
List descendants = new ArrayList<>();
try {
gatherDescendants(inodePath, descendants);
} catch (Throwable t) {
descendants.forEach(LockedInodePath::close);
throw t;
}
return new LockedInodePathList(descendants);
}
private void gatherDescendants(LockedInodePath inodePath, List descendants) {
Inode inode = inodePath.getInodeOrNull();
if (inode == null || inode.isFile()) {
return;
}
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
LockedInodePath childPath;
try {
childPath = inodePath.lockChild(child, LockPattern.WRITE_EDGE);
} catch (InvalidPathException e) {
// Child does not exist.
continue;
}
descendants.add(childPath);
gatherDescendants(childPath, descendants);
}
}
/**
* Deletes a single inode from the inode tree by removing it from the parent inode.
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to delete
* @param opTimeMs the operation time
* @throws FileDoesNotExistException if the Inode cannot be retrieved
*/
public void deleteInode(RpcContext rpcContext, LockedInodePath inodePath, long opTimeMs)
throws FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
Inode inode = inodePath.getInode();
mState.applyAndJournal(rpcContext, DeleteFileEntry.newBuilder()
.setId(inode.getId())
.setRecursive(false)
.setOpTimeMs(opTimeMs)
.setPath(inodePath.getUri().getPath())
.build());
if (inode.isFile()) {
rpcContext.getBlockDeletionContext().registerBlocksForDeletion(inode.asFile().getBlockIds());
}
}
private boolean checkPinningValidity(Set pinnedMediumTypes) {
List mediumTypeList = ServerConfiguration.getList(
PropertyKey.MASTER_TIERED_STORE_GLOBAL_MEDIUMTYPE, ",");
for (String medium : pinnedMediumTypes) {
if (!mediumTypeList.contains(medium)) {
// mediumTypeList does not contains medium
return false;
}
}
return true;
}
/**
* Sets the pinned state of an inode. If the inode is a directory, the pinned state will be set
* recursively.
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to set the pinned state for
* @param pinned the pinned state to set for the inode (and possible descendants)
* @param mediumTypes the list of pinned media that that the file can reside in
* @param opTimeMs the operation time
* @throws FileDoesNotExistException if inode does not exist
*/
public void setPinned(RpcContext rpcContext, LockedInodePath inodePath, boolean pinned,
List mediumTypes, long opTimeMs)
throws FileDoesNotExistException, InvalidPathException {
Preconditions.checkState(inodePath.getLockPattern().isWrite());
Set mediumSet = new HashSet<>(mediumTypes);
Preconditions.checkState(checkPinningValidity(mediumSet));
Inode inode = inodePath.getInode();
mState.applyAndJournal(rpcContext, UpdateInodeEntry.newBuilder()
.setId(inode.getId())
.setPinned(pinned)
.addAllMediumType(mediumSet)
.setLastModificationTimeMs(opTimeMs)
.build());
if (inode.isDirectory()) {
assert inode instanceof InodeDirectory;
// inode is a directory. Set the pinned state for all children.
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
try (LockedInodePath childPath =
inodePath.lockChild(child, LockPattern.WRITE_INODE)) {
// No need for additional locking since the parent is write-locked.
setPinned(rpcContext, childPath, pinned, mediumTypes, opTimeMs);
}
}
}
}
/**
* Sets the min and/or max replication level of an inode. If the inode is a directory, the state
* will be set recursively. Arguments replicationMax and replicationMin can be null if they are
* not meant to be set.
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to set the pinned state for
* @param replicationMax the max replication level to set for the inode (and possible descendants)
* @param replicationMin the min replication level to set for the inode (and possible descendants)
* @param opTimeMs the operation time
* @throws FileDoesNotExistException if inode does not exist
*/
public void setReplication(RpcContext rpcContext, LockedInodePath inodePath,
Integer replicationMax, Integer replicationMin, long opTimeMs)
throws FileDoesNotExistException, InvalidPathException {
Preconditions.checkArgument(replicationMin != null || replicationMax != null,
PreconditionMessage.INVALID_REPLICATION_MAX_MIN_VALUE_NULL);
Preconditions.checkArgument(replicationMin == null || replicationMin >= 0,
PreconditionMessage.INVALID_REPLICATION_MIN_VALUE);
Preconditions.checkState(inodePath.getLockPattern().isWrite());
Inode inode = inodePath.getInode();
if (inode.isFile()) {
InodeFile inodeFile = inode.asFile();
int newMax = (replicationMax == null) ? inodeFile.getReplicationMax() : replicationMax;
int newMin = (replicationMin == null) ? inodeFile.getReplicationMin() : replicationMin;
Preconditions.checkArgument(newMax == alluxio.Constants.REPLICATION_MAX_INFINITY
|| newMax >= newMin,
PreconditionMessage.INVALID_REPLICATION_MAX_SMALLER_THAN_MIN.toString(),
replicationMax, replicationMax);
mState.applyAndJournal(rpcContext, UpdateInodeFileEntry.newBuilder()
.setId(inode.getId())
.setReplicationMax(newMax)
.setReplicationMin(newMin)
.build());
mState.applyAndJournal(rpcContext, UpdateInodeEntry.newBuilder()
.setId(inode.getId())
.setPinned(newMin > 0)
.setLastModificationTimeMs(opTimeMs)
.build());
} else {
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
try (LockedInodePath tempInodePath =
inodePath.lockChild(child, LockPattern.WRITE_INODE)) {
// No need for additional locking since the parent is write-locked.
setReplication(rpcContext, tempInodePath, replicationMax, replicationMin, opTimeMs);
}
}
}
}
/**
* @return the set of file ids whose replication max is not infinity
*/
public Set getReplicationLimitedFileIds() {
return mState.getReplicationLimitedFileIds();
}
/**
* @return an unmodifiable view of the files with persistence state
* {@link PersistenceState#TO_BE_PERSISTED}
*/
public Set getToBePersistedIds() {
return mState.getToBePersistedIds();
}
/**
* @return the set of file ids which are pinned
*/
public Set getPinIdSet() {
return new HashSet<>(mState.getPinnedInodeFileIds());
}
/**
* @return the inode lock manager for the inode tree
*/
public InodeLockManager getInodeLockManager() {
return mInodeLockManager;
}
/**
* @param fileId the file id to check
* @return true if the given file id is the root id
*/
public boolean isRootId(long fileId) {
Preconditions.checkNotNull(mState.getRoot(),
PreconditionMessage.INODE_TREE_UNINITIALIZED_IS_ROOT_ID);
return fileId == mState.getRoot().getId();
}
@Override
public Journaled getDelegate() {
return mState;
}
/**
* Synchronously persists an inode directory to the UFS. If concurrent calls are made, only
* one thread will persist to UFS, and the others will wait until it is persisted.
*
* @param context journal context supplier
* @param dir the inode directory to persist
* @throws InvalidPathException if the path for the inode is invalid
* @throws FileDoesNotExistException if the path for the inode is invalid
*/
public void syncPersistExistingDirectory(Supplier context, InodeDirectoryView dir)
throws IOException, InvalidPathException, FileDoesNotExistException {
RetryPolicy retry =
new ExponentialBackoffRetry(PERSIST_WAIT_BASE_SLEEP_MS, PERSIST_WAIT_MAX_SLEEP_MS,
PERSIST_WAIT_MAX_RETRIES);
while (retry.attempt()) {
if (dir.getPersistenceState() == PersistenceState.PERSISTED) {
// The directory is persisted
return;
}
Optional persisting = mInodeLockManager.tryAcquirePersistingLock(dir.getId());
if (!persisting.isPresent()) {
// Someone else is doing this persist. Continue and wait for them to finish.
continue;
}
try (Scoped s = persisting.get()) {
if (dir.getPersistenceState() == PersistenceState.PERSISTED) {
// The directory is persisted
return;
}
mState.applyAndJournal(context, UpdateInodeEntry.newBuilder()
.setId(dir.getId())
.setPersistenceState(PersistenceState.TO_BE_PERSISTED.name())
.build());
UpdateInodeEntry.Builder entry = UpdateInodeEntry.newBuilder()
.setId(dir.getId());
syncPersistDirectory(dir).ifPresent(status -> {
if (isRootId(dir.getId())) {
// Don't load the root dir metadata from UFS
return;
}
entry.setOwner(status.getOwner())
.setGroup(status.getGroup())
.setMode(status.getMode());
Map xattr = status.getXAttr();
if (xattr != null) {
entry.putAllXAttr(CommonUtils.convertToByteString(xattr));
}
Long lastModificationTime = status.getLastModifiedTime();
if (lastModificationTime != null) {
entry.setLastModificationTimeMs(lastModificationTime)
.setOverwriteModificationTime(true);
}
});
entry.setPersistenceState(PersistenceState.PERSISTED.name());
mState.applyAndJournal(context, entry.build());
return;
}
}
throw new IOException(ExceptionMessage.FAILED_UFS_CREATE.getMessage(dir.getName()));
}
/**
* Synchronously persists an inode directory to the UFS.
*
* This method does not handle concurrent modification to the given inode, so the inode must not
* yet be added to the inode tree.
*
* @param dir the inode directory to persist
*/
public void syncPersistNewDirectory(MutableInodeDirectory dir)
throws InvalidPathException, FileDoesNotExistException, IOException {
dir.setPersistenceState(PersistenceState.TO_BE_PERSISTED);
syncPersistDirectory(dir).ifPresent(status -> {
// If the directory already exists in the UFS, update our metadata to match the UFS.
dir.setOwner(status.getOwner())
.setGroup(status.getGroup())
.setMode(status.getMode())
.setXAttr(status.getXAttr());
Long lastModificationTime = status.getLastModifiedTime();
if (lastModificationTime != null) {
dir.setLastModificationTimeMs(lastModificationTime, true);
// TODO(feng): update last access time from UFS
dir.setLastAccessTimeMs(lastModificationTime, true);
}
});
dir.setPersistenceState(PersistenceState.PERSISTED);
}
/**
* Persists the directory to the UFS, returning the UFS status if the directory is found to
* already exist in the UFS.
*
* @param dir the directory to persist
* @return optional ufs status if the directory already existed
*/
private Optional syncPersistDirectory(InodeDirectoryView dir)
throws FileDoesNotExistException, IOException, InvalidPathException {
AlluxioURI uri = getPath(dir);
MountTable.Resolution resolution = mMountTable.resolve(uri);
String ufsUri = resolution.getUri().toString();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
MkdirsOptions mkdirsOptions =
MkdirsOptions.defaults(ServerConfiguration.global()).setCreateParent(false)
.setOwner(dir.getOwner()).setGroup(dir.getGroup()).setMode(new Mode(dir.getMode()));
if (!ufs.mkdirs(ufsUri, mkdirsOptions)) {
// Directory might already exist. Try loading the status from ufs.
UfsStatus status;
try {
status = ufs.getStatus(ufsUri);
} catch (Exception e) {
throw new IOException(String.format("Cannot create or load UFS directory %s: %s.",
ufsUri, e.toString()), e);
}
if (status.isFile()) {
throw new InvalidPathException(String.format(
"Error persisting directory. A file exists at the UFS location %s.", ufsUri));
}
return Optional.of(status);
}
}
return Optional.empty();
}
/**
* Close resources associated with this tree instance.
*
* @throws IOException
*/
public void close() throws IOException {
mInodeStore.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy