alluxio.master.file.DefaultFileSystemMaster Maven / Gradle / Ivy
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.file;
import alluxio.AlluxioURI;
import alluxio.ClientContext;
import alluxio.Constants;
import alluxio.Server;
import alluxio.client.WriteType;
import alluxio.client.job.JobMasterClient;
import alluxio.client.job.JobMasterClientPool;
import alluxio.clock.SystemClock;
import alluxio.collections.Pair;
import alluxio.collections.PrefixList;
import alluxio.conf.PropertyKey;
import alluxio.conf.ServerConfiguration;
import alluxio.exception.AccessControlException;
import alluxio.exception.AlluxioException;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ConnectionFailedException;
import alluxio.exception.DirectoryNotEmptyException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileAlreadyCompletedException;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidFileSizeException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.PreconditionMessage;
import alluxio.exception.UnexpectedAlluxioException;
import alluxio.exception.status.FailedPreconditionException;
import alluxio.exception.status.InvalidArgumentException;
import alluxio.exception.status.NotFoundException;
import alluxio.exception.status.PermissionDeniedException;
import alluxio.exception.status.ResourceExhaustedException;
import alluxio.exception.status.UnavailableException;
import alluxio.file.options.DescendantType;
import alluxio.grpc.CompleteFilePOptions;
import alluxio.grpc.DeletePOptions;
import alluxio.grpc.FileSystemMasterCommonPOptions;
import alluxio.grpc.GrpcService;
import alluxio.grpc.GrpcUtils;
import alluxio.grpc.LoadDescendantPType;
import alluxio.grpc.LoadMetadataPOptions;
import alluxio.grpc.LoadMetadataPType;
import alluxio.grpc.MountPOptions;
import alluxio.grpc.ServiceType;
import alluxio.grpc.SetAclAction;
import alluxio.grpc.SetAttributePOptions;
import alluxio.grpc.TtlAction;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.master.CoreMaster;
import alluxio.master.CoreMasterContext;
import alluxio.master.ProtobufUtils;
import alluxio.master.audit.AsyncUserAccessAuditLogWriter;
import alluxio.master.audit.AuditContext;
import alluxio.master.block.BlockId;
import alluxio.master.block.BlockMaster;
import alluxio.master.block.DefaultBlockMaster;
import alluxio.master.file.activesync.ActiveSyncManager;
import alluxio.master.file.contexts.CheckConsistencyContext;
import alluxio.master.file.contexts.CompleteFileContext;
import alluxio.master.file.contexts.CreateDirectoryContext;
import alluxio.master.file.contexts.CreateFileContext;
import alluxio.master.file.contexts.DeleteContext;
import alluxio.master.file.contexts.FreeContext;
import alluxio.master.file.contexts.GetStatusContext;
import alluxio.master.file.contexts.ListStatusContext;
import alluxio.master.file.contexts.LoadMetadataContext;
import alluxio.master.file.contexts.MountContext;
import alluxio.master.file.contexts.RenameContext;
import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext;
import alluxio.master.file.contexts.SetAclContext;
import alluxio.master.file.contexts.SetAttributeContext;
import alluxio.master.file.contexts.WorkerHeartbeatContext;
import alluxio.master.file.meta.FileSystemMasterView;
import alluxio.master.file.meta.Inode;
import alluxio.master.file.meta.InodeDirectory;
import alluxio.master.file.meta.InodeDirectoryIdGenerator;
import alluxio.master.file.meta.InodeDirectoryView;
import alluxio.master.file.meta.InodeFile;
import alluxio.master.file.meta.InodeLockManager;
import alluxio.master.file.meta.InodePathPair;
import alluxio.master.file.meta.InodeTree;
import alluxio.master.file.meta.InodeTree.LockPattern;
import alluxio.master.file.meta.LockedInodePath;
import alluxio.master.file.meta.LockedInodePathList;
import alluxio.master.file.meta.LockingScheme;
import alluxio.master.file.meta.MountTable;
import alluxio.master.file.meta.PersistenceState;
import alluxio.master.file.meta.UfsAbsentPathCache;
import alluxio.master.file.meta.UfsBlockLocationCache;
import alluxio.master.file.meta.UfsSyncPathCache;
import alluxio.master.file.meta.UfsSyncUtils;
import alluxio.master.file.meta.options.MountInfo;
import alluxio.master.journal.DelegatingJournaled;
import alluxio.master.journal.JournaledGroup;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.Journaled;
import alluxio.master.journal.checkpoint.CheckpointName;
import alluxio.master.metastore.DelegatingReadOnlyInodeStore;
import alluxio.master.metastore.InodeStore;
import alluxio.master.metastore.ReadOnlyInodeStore;
import alluxio.master.metrics.TimeSeriesStore;
import alluxio.metrics.MasterMetrics;
import alluxio.metrics.MetricsSystem;
import alluxio.metrics.TimeSeries;
import alluxio.proto.journal.File;
import alluxio.proto.journal.File.AddSyncPointEntry;
import alluxio.proto.journal.File.NewBlockEntry;
import alluxio.proto.journal.File.RemoveSyncPointEntry;
import alluxio.proto.journal.File.RenameEntry;
import alluxio.proto.journal.File.SetAclEntry;
import alluxio.proto.journal.File.UpdateInodeEntry;
import alluxio.proto.journal.File.UpdateInodeFileEntry;
import alluxio.proto.journal.File.UpdateInodeFileEntry.Builder;
import alluxio.proto.journal.Journal.JournalEntry;
import alluxio.resource.CloseableResource;
import alluxio.resource.LockResource;
import alluxio.retry.CountingRetry;
import alluxio.retry.RetryPolicy;
import alluxio.security.authentication.AuthType;
import alluxio.security.authentication.AuthenticatedClientUser;
import alluxio.security.authentication.ClientIpAddressInjector;
import alluxio.security.authorization.AccessControlList;
import alluxio.security.authorization.AclEntry;
import alluxio.security.authorization.AclEntryType;
import alluxio.security.authorization.DefaultAccessControlList;
import alluxio.security.authorization.Mode;
import alluxio.underfs.Fingerprint;
import alluxio.underfs.Fingerprint.Tag;
import alluxio.underfs.MasterUfsManager;
import alluxio.underfs.UfsFileStatus;
import alluxio.underfs.UfsManager;
import alluxio.underfs.UfsMode;
import alluxio.underfs.UfsStatus;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.UnderFileSystemConfiguration;
import alluxio.underfs.options.ListOptions;
import alluxio.util.CommonUtils;
import alluxio.util.IdUtils;
import alluxio.util.ModeUtils;
import alluxio.util.SecurityUtils;
import alluxio.util.UnderFileSystemUtils;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceFactory;
import alluxio.util.interfaces.Scoped;
import alluxio.util.io.PathUtils;
import alluxio.util.proto.ProtoUtils;
import alluxio.wire.BlockInfo;
import alluxio.wire.BlockLocation;
import alluxio.wire.CommandType;
import alluxio.wire.FileBlockInfo;
import alluxio.wire.FileInfo;
import alluxio.wire.FileSystemCommand;
import alluxio.wire.FileSystemCommandOptions;
import alluxio.wire.MountPointInfo;
import alluxio.wire.PersistCommandOptions;
import alluxio.wire.PersistFile;
import alluxio.wire.SyncPointInfo;
import alluxio.wire.UfsInfo;
import alluxio.wire.WorkerInfo;
import alluxio.worker.job.JobMasterClientContext;
import com.codahale.metrics.Counter;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import io.grpc.ServerInterceptors;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.Stack;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
/**
* The master that handles all file system metadata management.
*/
@NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1664)
public final class DefaultFileSystemMaster extends CoreMaster
implements FileSystemMaster, DelegatingJournaled {
private static final Logger LOG = LoggerFactory.getLogger(DefaultFileSystemMaster.class);
private static final Set> DEPS = ImmutableSet.of(BlockMaster.class);
/** The number of threads to use in the {@link #mPersistCheckerPool}. */
private static final int PERSIST_CHECKER_POOL_THREADS = 128;
/**
* Locking in DefaultFileSystemMaster
*
* Individual paths are locked in the inode tree. In order to read or write any inode, the path
* must be locked. A path is locked via one of the lock methods in {@link InodeTree}, such as
* {@link InodeTree#lockInodePath(AlluxioURI, LockMode)} or
* {@link InodeTree#lockFullInodePath(AlluxioURI, LockMode)}. These lock methods return
* an {@link LockedInodePath}, which represents a locked path of inodes. These locked paths
* ({@link LockedInodePath}) must be unlocked. In order to ensure a locked
* {@link LockedInodePath} is always unlocked, the following paradigm is recommended:
*
*
* try (LockedInodePath inodePath = mInodeTree.lockInodePath(path, LockPattern.READ)) {
* ...
* }
*
*
* When locking a path in the inode tree, it is possible that other concurrent operations have
* modified the inode tree while a thread is waiting to acquire a lock on the inode. Lock
* acquisitions throw {@link InvalidPathException} to indicate that the inode structure is no
* longer consistent with what the caller original expected, for example if the inode
* previously obtained at /pathA has been renamed to /pathB during the wait for the inode lock.
* Methods which specifically act on a path will propagate this exception to the caller, while
* methods which iterate over child nodes can safely ignore the exception and treat the inode
* as no longer a child.
*
* JournalContext, BlockDeletionContext, and RpcContext
*
* RpcContext is an aggregator for various contexts which get passed around through file system
* master methods.
*
* Currently there are two types of contexts that get passed around: {@link JournalContext} and
* {@link BlockDeletionContext}. These contexts are used to register work that should be done when
* the context closes. The journal context tracks journal entries which need to be flushed, while
* the block deletion context tracks which blocks need to be deleted in the {@link BlockMaster}.
*
* File system master journal entries should be written before blocks are deleted in the block
* master, so journal context should always be closed before block deletion context. In order to
* ensure that contexts are closed and closed in the right order, the following paradign is
* recommended:
*
*
* try (RpcContext rpcContext = createRpcContext()) {
* // access journal context with rpcContext.getJournalContext()
* // access block deletion context with rpcContext.getBlockDeletionContext()
* ...
* }
*
*
* When used in conjunction with {@link LockedInodePath} and {@link AuditContext}, the usage
* should look like
*
*
* try (RpcContext rpcContext = createRpcContext();
* LockedInodePath inodePath = mInodeTree.lockInodePath(...);
* FileSystemMasterAuditContext auditContext = createAuditContext(...)) {
* ...
* }
*
*
* NOTE: Because resources are released in the opposite order they are acquired, the
* {@link JournalContext}, {@link BlockDeletionContext}, or {@link RpcContext} resources should be
* always created before any {@link LockedInodePath} resources to avoid holding an inode path lock
* while waiting for journal IO.
*
* User access audit logging in the FileSystemMaster
*
* User accesses to file system metadata should be audited. The intent to write audit log and the
* actual writing of the audit log is decoupled so that operations are not holding metadata locks
* waiting on the audit log IO. In particular {@link AsyncUserAccessAuditLogWriter} uses a
* separate thread to perform actual audit log IO. In order for audit log entries to preserve
* the order of file system operations, the intention of auditing should be submitted to
* {@link AsyncUserAccessAuditLogWriter} while holding locks on the inode path. That said, the
* {@link AuditContext} resources should always live within the scope of {@link LockedInodePath},
* i.e. created after {@link LockedInodePath}. Otherwise, the order of audit log entries may not
* reflect the actual order of the user accesses.
* Resources are released in the opposite order they are acquired, the
* {@link AuditContext#close()} method is called before {@link LockedInodePath#close()}, thus
* guaranteeing the order.
*
* Method Conventions in the FileSystemMaster
*
* All of the flow of the FileSystemMaster follow a convention. There are essentially 4 main
* types of methods:
* (A) public api methods
* (B) private (or package private) internal methods
*
* (A) public api methods:
* These methods are public and are accessed by the RPC and REST APIs. These methods lock all
* the required paths, and also perform all permission checking.
* (A) cannot call (A)
* (A) can call (B)
*
* (B) private (or package private) internal methods:
* These methods perform the rest of the work. The names of these
* methods are suffixed by "Internal". These are typically called by the (A) methods.
* (B) cannot call (A)
* (B) can call (B)
*/
/** Handle to the block master. */
private final BlockMaster mBlockMaster;
/** This manages the file system inode structure. This must be journaled. */
private final InodeTree mInodeTree;
/** Store for holding inodes. */
private final ReadOnlyInodeStore mInodeStore;
/** This manages inode locking. */
private final InodeLockManager mInodeLockManager;
/** This manages the file system mount points. */
private final MountTable mMountTable;
/** This generates unique directory ids. This must be journaled. */
private final InodeDirectoryIdGenerator mDirectoryIdGenerator;
/** This checks user permissions on different operations. */
private final PermissionChecker mPermissionChecker;
/** List of paths to always keep in memory. */
private final PrefixList mWhitelist;
/** A pool of job master clients. */
private final JobMasterClientPool mJobMasterClientPool;
/** Set of file IDs to persist. */
private final Map mPersistRequests;
/** Map from file IDs to persist jobs. */
private final Map mPersistJobs;
/** The manager of all ufs. */
private final MasterUfsManager mUfsManager;
/** This caches absent paths in the UFS. */
private final UfsAbsentPathCache mUfsAbsentPathCache;
/** This caches block locations in the UFS. */
private final UfsBlockLocationCache mUfsBlockLocationCache;
/** This caches paths which have been synced with UFS. */
private final UfsSyncPathCache mUfsSyncPathCache;
/** The {@link JournaledGroup} representing all the subcomponents which require journaling. */
private final JournaledGroup mJournaledGroup;
/** List of strings which are blacklisted from async persist. */
private final List mPersistBlacklist;
/** Thread pool which asynchronously handles the completion of persist jobs. */
private java.util.concurrent.ThreadPoolExecutor mPersistCheckerPool;
private ActiveSyncManager mSyncManager;
/** Log writer for user access audit log. */
private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter;
/** Stores the time series for various metrics which are exposed in the UI. */
private TimeSeriesStore mTimeSeriesStore;
private AccessTimeUpdater mAccessTimeUpdater;
/**
* Creates a new instance of {@link DefaultFileSystemMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
*/
public DefaultFileSystemMaster(BlockMaster blockMaster, CoreMasterContext masterContext) {
this(blockMaster, masterContext,
ExecutorServiceFactories.cachedThreadPool(Constants.FILE_SYSTEM_MASTER_NAME));
}
/**
* Creates a new instance of {@link DefaultFileSystemMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
* @param executorServiceFactory a factory for creating the executor service to use for running
* maintenance threads
*/
public DefaultFileSystemMaster(BlockMaster blockMaster, CoreMasterContext masterContext,
ExecutorServiceFactory executorServiceFactory) {
super(masterContext, new SystemClock(), executorServiceFactory);
mBlockMaster = blockMaster;
mDirectoryIdGenerator = new InodeDirectoryIdGenerator(mBlockMaster);
mUfsManager = masterContext.getUfsManager();
mMountTable = new MountTable(mUfsManager, getRootMountInfo(mUfsManager));
mInodeLockManager = new InodeLockManager();
InodeStore inodeStore = masterContext.getInodeStoreFactory().apply(mInodeLockManager);
mInodeStore = new DelegatingReadOnlyInodeStore(inodeStore);
mInodeTree = new InodeTree(inodeStore, mBlockMaster,
mDirectoryIdGenerator, mMountTable, mInodeLockManager);
// TODO(gene): Handle default config value for whitelist.
mWhitelist = new PrefixList(ServerConfiguration.getList(PropertyKey.MASTER_WHITELIST, ","));
mPersistBlacklist = ServerConfiguration.isSet(PropertyKey.MASTER_PERSISTENCE_BLACKLIST)
? ServerConfiguration.getList(PropertyKey.MASTER_PERSISTENCE_BLACKLIST, ",")
: Collections.emptyList();
mPermissionChecker = new DefaultPermissionChecker(mInodeTree);
mJobMasterClientPool = new JobMasterClientPool(JobMasterClientContext
.newBuilder(ClientContext.create(ServerConfiguration.global())).build());
mPersistRequests = new java.util.concurrent.ConcurrentHashMap<>();
mPersistJobs = new java.util.concurrent.ConcurrentHashMap<>();
mUfsAbsentPathCache = UfsAbsentPathCache.Factory.create(mMountTable);
mUfsBlockLocationCache = UfsBlockLocationCache.Factory.create(mMountTable);
mUfsSyncPathCache = new UfsSyncPathCache();
mSyncManager = new ActiveSyncManager(mMountTable, this);
mTimeSeriesStore = new TimeSeriesStore();
mAccessTimeUpdater = new AccessTimeUpdater(this, mInodeTree, masterContext.getJournalSystem());
// The mount table should come after the inode tree because restoring the mount table requires
// that the inode tree is already restored.
ArrayList journaledComponents = new ArrayList() {
{
add(mInodeTree);
add(mDirectoryIdGenerator);
add(mMountTable);
add(mUfsManager);
add(mSyncManager);
}
};
mJournaledGroup = new JournaledGroup(journaledComponents, CheckpointName.FILE_SYSTEM_MASTER);
resetState();
Metrics.registerGauges(this, mUfsManager);
}
private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) {
try (CloseableResource resource = ufsManager.getRoot().acquireUfsResource()) {
String rootUfsUri = ServerConfiguration.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS);
boolean shared = resource.get().isObjectStorage()
&& ServerConfiguration.getBoolean(PropertyKey.UNDERFS_OBJECT_STORE_MOUNT_SHARED_PUBLICLY);
Map rootUfsConf =
ServerConfiguration.getNestedProperties(PropertyKey.MASTER_MOUNT_TABLE_ROOT_OPTION);
MountPOptions mountOptions = MountContext
.mergeFrom(MountPOptions.newBuilder().setShared(shared).putAllProperties(rootUfsConf))
.getOptions().build();
return new MountInfo(new AlluxioURI(MountTable.ROOT),
new AlluxioURI(rootUfsUri), IdUtils.ROOT_MOUNT_ID, mountOptions);
}
}
@Override
public Map getServices() {
Map services = new HashMap<>();
services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors
.intercept(new FileSystemMasterClientServiceHandler(this), new ClientIpAddressInjector())));
services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE,
new GrpcService(new FileSystemMasterJobServiceHandler(this)));
services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE,
new GrpcService(new FileSystemMasterWorkerServiceHandler(this)));
return services;
}
@Override
public String getName() {
return Constants.FILE_SYSTEM_MASTER_NAME;
}
@Override
public Set> getDependencies() {
return DEPS;
}
@Override
public Journaled getDelegate() {
return mJournaledGroup;
}
@Override
public void start(Boolean isPrimary) throws IOException {
super.start(isPrimary);
if (isPrimary) {
LOG.info("Starting fs master as primary");
InodeDirectory root = mInodeTree.getRoot();
if (root == null) {
try (JournalContext context = createJournalContext()) {
mInodeTree.initializeRoot(
SecurityUtils.getOwner(mMasterContext.getUserState()),
SecurityUtils.getGroup(mMasterContext.getUserState(), ServerConfiguration.global()),
ModeUtils.applyDirectoryUMask(Mode.createFullAccess(),
ServerConfiguration.get(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK)),
context);
}
} else if (!ServerConfiguration.getBoolean(PropertyKey.MASTER_SKIP_ROOT_ACL_CHECK)) {
// For backwards-compatibility:
// Empty root owner indicates that previously the master had no security. In this case, the
// master is allowed to be started with security turned on.
String serverOwner = SecurityUtils.getOwner(mMasterContext.getUserState());
if (SecurityUtils.isSecurityEnabled(ServerConfiguration.global())
&& !root.getOwner().isEmpty() && !root.getOwner().equals(serverOwner)) {
// user is not the previous owner
throw new PermissionDeniedException(ExceptionMessage.PERMISSION_DENIED.getMessage(String
.format("Unauthorized user on root. inode owner: %s current user: %s",
root.getOwner(), serverOwner)));
}
}
// Initialize the ufs manager from the mount table.
for (String key : mMountTable.getMountTable().keySet()) {
if (key.equals(MountTable.ROOT)) {
continue;
}
MountInfo mountInfo = mMountTable.getMountTable().get(key);
UnderFileSystemConfiguration ufsConf =
UnderFileSystemConfiguration.defaults(ServerConfiguration.global())
.createMountSpecificConf(mountInfo.getOptions().getPropertiesMap())
.setReadOnly(mountInfo.getOptions().getReadOnly())
.setShared(mountInfo.getOptions().getShared());
mUfsManager.addMount(mountInfo.getMountId(), mountInfo.getUfsUri(), ufsConf);
}
// Startup Checks and Periodic Threads.
// Rebuild the list of persist jobs (mPersistJobs) and map of pending persist requests
// (mPersistRequests)
long persistInitialIntervalMs =
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_INITIAL_INTERVAL_MS);
long persistMaxIntervalMs =
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_INTERVAL_MS);
long persistMaxWaitMs =
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_TOTAL_WAIT_TIME_MS);
for (Long id : mInodeTree.getToBePersistedIds()) {
Inode inode = mInodeStore.get(id).get();
if (inode.isDirectory()
|| !inode.asFile().isCompleted() // When file is completed it is added to persist reqs
|| inode.getPersistenceState() != PersistenceState.TO_BE_PERSISTED
|| inode.asFile().getShouldPersistTime() == Constants.NO_AUTO_PERSIST) {
continue;
}
InodeFile inodeFile = inode.asFile();
if (inodeFile.getPersistJobId() == Constants.PERSISTENCE_INVALID_JOB_ID) {
mPersistRequests.put(inodeFile.getId(),
new alluxio.time.ExponentialTimer(
persistInitialIntervalMs,
persistMaxIntervalMs,
getPersistenceWaitTime(inodeFile.getShouldPersistTime()),
persistMaxWaitMs));
} else {
AlluxioURI path;
try {
path = mInodeTree.getPath(inodeFile);
} catch (FileDoesNotExistException e) {
LOG.error("Failed to determine path for inode with id {}", id, e);
continue;
}
addPersistJob(id, inodeFile.getPersistJobId(),
getPersistenceWaitTime(inodeFile.getShouldPersistTime()),
path, inodeFile.getTempUfsPath());
}
}
if (ServerConfiguration
.getBoolean(PropertyKey.MASTER_STARTUP_BLOCK_INTEGRITY_CHECK_ENABLED)) {
validateInodeBlocks(true);
}
int blockIntegrityCheckInterval = (int) ServerConfiguration
.getMs(PropertyKey.MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL);
if (blockIntegrityCheckInterval > 0) { // negative or zero interval implies disabled
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_BLOCK_INTEGRITY_CHECK,
new BlockIntegrityChecker(this), blockIntegrityCheckInterval,
ServerConfiguration.global(), mMasterContext.getUserState()));
}
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_TTL_CHECK,
new InodeTtlChecker(this, mInodeTree),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_LOST_FILES_DETECTION,
new LostFileDetector(this, mInodeTree),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_WORKER_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(new HeartbeatThread(
HeartbeatContext.MASTER_REPLICATION_CHECK,
new alluxio.master.file.replication.ReplicationChecker(mInodeTree, mBlockMaster,
mSafeModeManager, mJobMasterClientPool),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_SCHEDULER,
new PersistenceScheduler(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS),
ServerConfiguration.global(), mMasterContext.getUserState()));
mPersistCheckerPool =
new java.util.concurrent.ThreadPoolExecutor(PERSIST_CHECKER_POOL_THREADS,
PERSIST_CHECKER_POOL_THREADS, 1, java.util.concurrent.TimeUnit.MINUTES,
new LinkedBlockingQueue(),
alluxio.util.ThreadFactoryUtils.build("Persist-Checker-%d", true));
mPersistCheckerPool.allowCoreThreadTimeOut(true);
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_CHECKER,
new PersistenceChecker(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_METRICS_TIME_SERIES,
new TimeSeriesRecorder(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
if (ServerConfiguration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) {
mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter();
mAsyncAuditLogWriter.start();
}
if (ServerConfiguration.getBoolean(PropertyKey.UNDERFS_CLEANUP_ENABLED)) {
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_UFS_CLEANUP, new UfsCleaner(this),
(int) ServerConfiguration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
}
mAccessTimeUpdater.start();
mSyncManager.start();
}
}
@Override
public void stop() throws IOException {
if (mAsyncAuditLogWriter != null) {
mAsyncAuditLogWriter.stop();
mAsyncAuditLogWriter = null;
}
mSyncManager.stop();
mAccessTimeUpdater.stop();
super.stop();
}
@Override
public void close() throws IOException {
super.close();
mInodeTree.close();
}
@Override
public void validateInodeBlocks(boolean repair) throws UnavailableException {
mBlockMaster.validateBlocks((blockId) -> {
long fileId = IdUtils.fileIdFromBlockId(blockId);
return mInodeTree.inodeIdExists(fileId);
}, repair);
}
@Override
public void cleanupUfs() {
for (Map.Entry mountPoint : mMountTable.getMountTable().entrySet()) {
MountInfo info = mountPoint.getValue();
if (info.getOptions().getReadOnly()) {
continue;
}
try (CloseableResource ufsResource =
mUfsManager.get(info.getMountId()).acquireUfsResource()) {
ufsResource.get().cleanup();
} catch (UnavailableException | NotFoundException e) {
LOG.error("No UFS cached for {}", info, e);
} catch (IOException e) {
LOG.error("Failed in cleanup UFS {}.", info, e);
}
}
}
@Override
public long getFileId(AlluxioURI path) throws AccessControlException, UnavailableException {
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree.lockInodePath(path, LockPattern.READ)) {
mPermissionChecker.checkPermission(Mode.Bits.READ, inodePath);
loadMetadataIfNotExist(rpcContext, inodePath, LoadMetadataContext
.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(true)));
mInodeTree.ensureFullInodePath(inodePath);
return inodePath.getInode().getId();
} catch (InvalidPathException | FileDoesNotExistException e) {
return IdUtils.INVALID_FILE_ID;
}
}
@Override
public FileInfo getFileInfo(long fileId)
throws FileDoesNotExistException, AccessControlException, UnavailableException {
Metrics.GET_FILE_INFO_OPS.inc();
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ)) {
return getFileInfoInternal(inodePath);
}
}
@Override
public FileInfo getFileInfo(AlluxioURI path, GetStatusContext context)
throws FileDoesNotExistException, InvalidPathException, AccessControlException, IOException {
Metrics.GET_FILE_INFO_OPS.inc();
long opTimeMs = System.currentTimeMillis();
LockingScheme lockingScheme =
createLockingScheme(path, context.getOptions().getCommonOptions(), LockPattern.READ, true);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("getFileInfo", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.READ, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Possible ufs sync.
if (syncMetadata(rpcContext, inodePath, lockingScheme, DescendantType.ONE)) {
// If synced, do not load metadata.
context.getOptions().setLoadMetadataType(LoadMetadataPType.NEVER);
}
// If the file already exists, then metadata does not need to be loaded,
// otherwise load metadata.
if (!inodePath.fullPathExists()) {
checkLoadMetadataOptions(context.getOptions().getLoadMetadataType(), inodePath.getUri());
loadMetadataIfNotExist(rpcContext, inodePath,
LoadMetadataContext.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(true)
.setCommonOptions(FileSystemMasterCommonPOptions.newBuilder()
.setTtl(context.getOptions().getCommonOptions().getTtl())
.setTtlAction(context.getOptions().getCommonOptions().getTtlAction()))));
ensureFullPathAndUpdateCache(inodePath);
}
FileInfo fileInfo = getFileInfoInternal(inodePath);
Mode.Bits accessMode = Mode.Bits.fromProto(context.getOptions().getAccessMode());
if (context.getOptions().getUpdateTimestamps() && context.getOptions().hasAccessMode()
&& (accessMode.imply(Mode.Bits.READ) || accessMode.imply(Mode.Bits.WRITE))) {
mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(),
inodePath.getInode(), opTimeMs);
}
auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true);
return fileInfo;
}
}
/**
* @param inodePath the {@link LockedInodePath} to get the {@link FileInfo} for
* @return the {@link FileInfo} for the given inode
*/
private FileInfo getFileInfoInternal(LockedInodePath inodePath)
throws FileDoesNotExistException, UnavailableException {
Inode inode = inodePath.getInode();
AlluxioURI uri = inodePath.getUri();
FileInfo fileInfo = inode.generateClientFileInfo(uri.toString());
if (fileInfo.isFolder()) {
fileInfo.setLength(inode.asDirectory().getChildCount());
}
fileInfo.setInMemoryPercentage(getInMemoryPercentage(inode));
fileInfo.setInAlluxioPercentage(getInAlluxioPercentage(inode));
if (inode.isFile()) {
try {
fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath));
} catch (InvalidPathException e) {
throw new FileDoesNotExistException(e.getMessage(), e);
}
}
fileInfo.setXAttr(inode.getXAttr());
MountTable.Resolution resolution;
try {
resolution = mMountTable.resolve(uri);
} catch (InvalidPathException e) {
throw new FileDoesNotExistException(e.getMessage(), e);
}
AlluxioURI resolvedUri = resolution.getUri();
fileInfo.setUfsPath(resolvedUri.toString());
fileInfo.setMountId(resolution.getMountId());
Metrics.FILE_INFOS_GOT.inc();
return fileInfo;
}
@Override
public PersistenceState getPersistenceState(long fileId) throws FileDoesNotExistException {
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ)) {
return inodePath.getInode().getPersistenceState();
}
}
@Override
public void listStatus(AlluxioURI path, ListStatusContext context,
ResultStream resultStream)
throws AccessControlException, FileDoesNotExistException, InvalidPathException, IOException {
Metrics.GET_FILE_INFO_OPS.inc();
LockingScheme lockingScheme =
createLockingScheme(path, context.getOptions().getCommonOptions(), LockPattern.READ);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("listStatus", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.READ, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
DescendantType descendantType = context.getOptions().getRecursive() ? DescendantType.ALL
: DescendantType.ONE;
// Possible ufs sync.
if (syncMetadata(rpcContext, inodePath, lockingScheme, descendantType)) {
// If synced, do not load metadata.
context.getOptions().setLoadMetadataType(LoadMetadataPType.NEVER);
}
DescendantType loadDescendantType;
if (context.getOptions().getLoadMetadataType() == LoadMetadataPType.NEVER) {
loadDescendantType = DescendantType.NONE;
} else if (context.getOptions().getRecursive()) {
loadDescendantType = DescendantType.ALL;
} else {
loadDescendantType = DescendantType.ONE;
}
// load metadata for 1 level of descendants, or all descendants if recursive
LoadMetadataContext loadMetadataContext =
LoadMetadataContext.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(true)
.setLoadDescendantType(GrpcUtils.toProto(loadDescendantType))
.setCommonOptions(FileSystemMasterCommonPOptions.newBuilder()
.setTtl(context.getOptions().getCommonOptions().getTtl())
.setTtlAction(context.getOptions().getCommonOptions().getTtlAction())));
Inode inode;
if (inodePath.fullPathExists()) {
inode = inodePath.getInode();
if (inode.isDirectory()
&& context.getOptions().getLoadMetadataType() != LoadMetadataPType.ALWAYS) {
InodeDirectory inodeDirectory = inode.asDirectory();
boolean isLoaded = inodeDirectory.isDirectChildrenLoaded();
if (context.getOptions().getRecursive()) {
isLoaded = areDescendantsLoaded(inodeDirectory);
}
if (isLoaded) {
// no need to load again.
loadMetadataContext.getOptions().setLoadDescendantType(LoadDescendantPType.NONE);
}
}
} else {
checkLoadMetadataOptions(context.getOptions().getLoadMetadataType(), inodePath.getUri());
}
loadMetadataIfNotExist(rpcContext, inodePath, loadMetadataContext);
ensureFullPathAndUpdateCache(inodePath);
inode = inodePath.getInode();
auditContext.setSrcInode(inode);
DescendantType descendantTypeForListStatus =
(context.getOptions().getRecursive()) ? DescendantType.ALL : DescendantType.ONE;
listStatusInternal(rpcContext, inodePath, auditContext, descendantTypeForListStatus,
resultStream, 0);
auditContext.setSucceeded(true);
Metrics.FILE_INFOS_GOT.inc();
}
}
@Override
public List listStatus(AlluxioURI path, ListStatusContext context)
throws AccessControlException, FileDoesNotExistException, InvalidPathException, IOException {
final List fileInfos = new ArrayList<>();
listStatus(path, context, (item) -> fileInfos.add(item));
return fileInfos;
}
/**
* Lists the status of the path in {@link LockedInodePath}, possibly recursively depending on the
* descendantType. The result is returned via a list specified by statusList, in postorder
* traversal order.
*
* @param rpcContext the context for the RPC call
* @param currInodePath the inode path to find the status
* @param auditContext the audit context to return any access exceptions
* @param descendantType if the currInodePath is a directory, how many levels of its descendant
* should be returned
* @param resultStream the stream to receive individual results
* @param depth internal use field for tracking depth relative to root item
*/
private void listStatusInternal(RpcContext rpcContext, LockedInodePath currInodePath,
AuditContext auditContext, DescendantType descendantType, ResultStream resultStream,
int depth) throws FileDoesNotExistException, UnavailableException, AccessControlException,
InvalidPathException {
Inode inode = currInodePath.getInode();
if (inode.isDirectory() && descendantType != DescendantType.NONE) {
try {
// TODO(david): Return the error message when we do not have permission
mPermissionChecker.checkPermission(Mode.Bits.EXECUTE, currInodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
if (descendantType == DescendantType.ALL) {
return;
} else {
throw e;
}
}
mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode,
CommonUtils.getCurrentMs());
DescendantType nextDescendantType = (descendantType == DescendantType.ALL)
? DescendantType.ALL : DescendantType.NONE;
// This is to generate a parsed child path components to be passed to lockChildPath
String [] childComponentsHint = null;
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
if (childComponentsHint == null) {
String[] parentComponents = PathUtils.getPathComponents(currInodePath.getUri().getPath());
childComponentsHint = new String[parentComponents.length + 1];
System.arraycopy(parentComponents, 0, childComponentsHint, 0, parentComponents.length);
}
// TODO(david): Make extending InodePath more efficient
childComponentsHint[childComponentsHint.length - 1] = child.getName();
try (LockedInodePath childInodePath =
currInodePath.lockChild(child, LockPattern.READ, childComponentsHint)) {
listStatusInternal(rpcContext, childInodePath, auditContext, nextDescendantType,
resultStream, depth + 1);
} catch (InvalidPathException | FileDoesNotExistException e) {
LOG.debug("Path \"{0}\" is invalid, has been ignored.",
PathUtils.concatPath("/", childComponentsHint));
}
}
}
// Listing a directory should not emit item for the directory itself.
if (depth != 0 || inode.isFile()) {
resultStream.submit(getFileInfoInternal(currInodePath));
}
}
/**
* Checks the {@link LoadMetadataPType} to determine whether or not to proceed in loading
* metadata. This method assumes that the path does not exist in Alluxio namespace, and will
* throw an exception if metadata should not be loaded.
*
* @param loadMetadataType the {@link LoadMetadataPType} to check
* @param path the path that does not exist in Alluxio namespace (used for exception message)
*/
private void checkLoadMetadataOptions(LoadMetadataPType loadMetadataType, AlluxioURI path)
throws FileDoesNotExistException {
if (loadMetadataType == LoadMetadataPType.NEVER || (loadMetadataType == LoadMetadataPType.ONCE
&& mUfsAbsentPathCache.isAbsent(path))) {
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(path));
}
}
private boolean areDescendantsLoaded(InodeDirectoryView inode) {
if (!inode.isDirectChildrenLoaded()) {
return false;
}
for (Inode child : mInodeStore.getChildren(inode)) {
if (child.isDirectory()) {
if (!areDescendantsLoaded(child.asDirectory())) {
return false;
}
}
}
return true;
}
/**
* Checks to see if the entire path exists in Alluxio. Updates the absent cache if it does not
* exist.
*
* @param inodePath the path to ensure
*/
private void ensureFullPathAndUpdateCache(LockedInodePath inodePath)
throws InvalidPathException, FileDoesNotExistException {
boolean exists = false;
try {
mInodeTree.ensureFullInodePath(inodePath);
exists = true;
} finally {
if (!exists) {
mUfsAbsentPathCache.process(inodePath.getUri(), inodePath.getInodeList());
}
}
}
@Override
public FileSystemMasterView getFileSystemMasterView() {
return new FileSystemMasterView(this);
}
@Override
public List checkConsistency(AlluxioURI path, CheckConsistencyContext context)
throws AccessControlException, FileDoesNotExistException, InvalidPathException, IOException {
LockingScheme lockingScheme =
createLockingScheme(path, context.getOptions().getCommonOptions(), LockPattern.READ);
List inconsistentUris = new ArrayList<>();
try (RpcContext rpcContext = createRpcContext();
LockedInodePath parent =
mInodeTree.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("checkConsistency", path, null, parent.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.READ, parent);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Possible ufs sync.
syncMetadata(rpcContext, parent, lockingScheme, DescendantType.ALL);
checkConsistencyRecursive(parent, inconsistentUris);
auditContext.setSucceeded(true);
}
return inconsistentUris;
}
private void checkConsistencyRecursive(LockedInodePath inodePath,
List inconsistentUris) throws IOException, FileDoesNotExistException {
Inode inode = inodePath.getInode();
try {
if (!checkConsistencyInternal(inodePath)) {
inconsistentUris.add(inodePath.getUri());
}
if (inode.isDirectory()) {
InodeDirectory inodeDir = inode.asDirectory();
for (Inode child : mInodeStore.getChildren(inodeDir)) {
try (LockedInodePath childPath = inodePath.lockChild(child, LockPattern.READ)) {
checkConsistencyRecursive(childPath, inconsistentUris);
}
}
}
} catch (InvalidPathException e) {
LOG.debug("Path \"{0}\" is invalid, has been ignored.",
PathUtils.concatPath(inodePath.getUri().getPath()));
}
}
/**
* Checks if a path is consistent between Alluxio and the underlying storage.
*
* A path without a backing under storage is always consistent.
*
* A not persisted path is considered consistent if:
* 1. It does not shadow an object in the underlying storage.
*
* A persisted path is considered consistent if:
* 1. An equivalent object exists for its under storage path.
* 2. The metadata of the Alluxio and under storage object are equal.
*
* @param inodePath the path to check. This must exist and be read-locked
* @return true if the path is consistent, false otherwise
*/
private boolean checkConsistencyInternal(LockedInodePath inodePath) throws InvalidPathException,
IOException {
Inode inode;
try {
inode = inodePath.getInode();
} catch (FileDoesNotExistException e) {
throw new RuntimeException(e); // already checked existence when creating the inodePath
}
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
String ufsPath = resolution.getUri().getPath();
if (ufs == null) {
return true;
}
if (!inode.isPersisted()) {
return !ufs.exists(ufsPath);
}
UfsStatus ufsStatus;
try {
ufsStatus = ufs.getStatus(ufsPath);
} catch (FileNotFoundException e) {
return !inode.isPersisted();
}
// TODO(calvin): Evaluate which other metadata fields should be validated.
if (inode.isDirectory()) {
return ufsStatus.isDirectory();
} else {
String ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize();
return ufsStatus.isFile()
&& (ufsFingerprint.equals(inode.asFile().getUfsFingerprint()));
}
}
}
@Override
public void completeFile(AlluxioURI path, CompleteFileContext context)
throws BlockInfoException, FileDoesNotExistException, InvalidPathException,
InvalidFileSizeException, FileAlreadyCompletedException, AccessControlException,
UnavailableException {
Metrics.COMPLETE_FILE_OPS.inc();
// No need to syncMetadata before complete.
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree.lockFullInodePath(path, LockPattern.WRITE_INODE);
FileSystemMasterAuditContext auditContext =
createAuditContext("completeFile", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Even readonly mount points should be able to complete a file, for UFS reads in CACHE mode.
completeFileInternal(rpcContext, inodePath, context);
// Schedule async persistence if requested.
if (context.getOptions().hasAsyncPersistOptions()) {
scheduleAsyncPersistenceInternal(inodePath, ScheduleAsyncPersistenceContext
.create(context.getOptions().getAsyncPersistOptionsBuilder()), rpcContext);
}
auditContext.setSucceeded(true);
}
}
/**
* Completes a file. After a file is completed, it cannot be written to.
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to complete
* @param context the method context
*/
private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePath,
CompleteFileContext context)
throws InvalidPathException, FileDoesNotExistException, BlockInfoException,
FileAlreadyCompletedException, InvalidFileSizeException, UnavailableException {
Inode inode = inodePath.getInode();
if (!inode.isFile()) {
throw new FileDoesNotExistException(
ExceptionMessage.PATH_MUST_BE_FILE.getMessage(inodePath.getUri()));
}
InodeFile fileInode = inode.asFile();
List blockIdList = fileInode.getBlockIds();
List blockInfoList = mBlockMaster.getBlockInfoList(blockIdList);
if (!fileInode.isPersisted() && blockInfoList.size() != blockIdList.size()) {
throw new BlockInfoException("Cannot complete a file without all the blocks committed");
}
// Iterate over all file blocks committed to Alluxio, computing the length and verify that all
// the blocks (except the last one) is the same size as the file block size.
long inAlluxioLength = 0;
long fileBlockSize = fileInode.getBlockSizeBytes();
for (int i = 0; i < blockInfoList.size(); i++) {
BlockInfo blockInfo = blockInfoList.get(i);
inAlluxioLength += blockInfo.getLength();
if (i < blockInfoList.size() - 1 && blockInfo.getLength() != fileBlockSize) {
throw new BlockInfoException(
"Block index " + i + " has a block size smaller than the file block size (" + fileInode
.getBlockSizeBytes() + ")");
}
}
// If the file is persisted, its length is determined by UFS. Otherwise, its length is
// determined by its size in Alluxio.
long length = fileInode.isPersisted() ? context.getOptions().getUfsLength() : inAlluxioLength;
String ufsFingerprint = Constants.INVALID_UFS_FINGERPRINT;
if (fileInode.isPersisted()) {
UfsStatus ufsStatus = context.getUfsStatus();
// Retrieve the UFS fingerprint for this file.
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
AlluxioURI resolvedUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (ufsStatus == null) {
ufsFingerprint = ufs.getFingerprint(resolvedUri.toString());
} else {
ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize();
}
}
}
completeFileInternal(rpcContext, inodePath, length, context.getOperationTimeMs(),
ufsFingerprint);
}
/**
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to complete
* @param length the length to use
* @param opTimeMs the operation time (in milliseconds)
* @param ufsFingerprint the ufs fingerprint
*/
private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePath, long length,
long opTimeMs, String ufsFingerprint)
throws FileDoesNotExistException, InvalidPathException, InvalidFileSizeException,
FileAlreadyCompletedException, UnavailableException {
Preconditions.checkState(inodePath.getLockPattern().isWrite());
InodeFile inode = inodePath.getInodeFile();
if (inode.isCompleted() && inode.getLength() != Constants.UNKNOWN_SIZE) {
throw new FileAlreadyCompletedException("File " + getName() + " has already been completed.");
}
if (length < 0 && length != Constants.UNKNOWN_SIZE) {
throw new InvalidFileSizeException(
"File " + inode.getName() + " cannot have negative length: " + length);
}
Builder entry = UpdateInodeFileEntry.newBuilder()
.setId(inode.getId())
.setPath(inodePath.getUri().getPath())
.setCompleted(true)
.setLength(length);
if (length == Constants.UNKNOWN_SIZE) {
// TODO(gpang): allow unknown files to be multiple blocks.
// If the length of the file is unknown, only allow 1 block to the file.
length = inode.getBlockSizeBytes();
}
int sequenceNumber = 0;
long remainingBytes = length;
while (remainingBytes > 0) {
entry.addSetBlocks(BlockId.createBlockId(inode.getBlockContainerId(), sequenceNumber));
remainingBytes -= Math.min(remainingBytes, inode.getBlockSizeBytes());
sequenceNumber++;
}
if (inode.isPersisted()) {
// Commit all the file blocks (without locations) so the metadata for the block exists.
long currLength = length;
for (long blockId : entry.getSetBlocksList()) {
long blockSize = Math.min(currLength, inode.getBlockSizeBytes());
mBlockMaster.commitBlockInUFS(blockId, blockSize);
currLength -= blockSize;
}
// The path exists in UFS, so it is no longer absent
mUfsAbsentPathCache.processExisting(inodePath.getUri());
}
// We could introduce a concept of composite entries, so that these two entries could
// be applied in a single call to applyAndJournal.
mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder()
.setId(inode.getId())
.setUfsFingerprint(ufsFingerprint)
.setLastModificationTimeMs(opTimeMs)
.setLastAccessTimeMs(opTimeMs)
.setOverwriteModificationTime(true)
.build());
mInodeTree.updateInodeFile(rpcContext, entry.build());
Metrics.FILES_COMPLETED.inc();
}
@Override
public FileInfo createFile(AlluxioURI path, CreateFileContext context)
throws AccessControlException, InvalidPathException, FileAlreadyExistsException,
BlockInfoException, IOException, FileDoesNotExistException {
Metrics.CREATE_FILES_OPS.inc();
LockingScheme lockingScheme = createLockingScheme(path, context.getOptions().getCommonOptions(),
LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("createFile", path, null, inodePath.getParentInodeOrNull())) {
if (context.getOptions().getRecursive()) {
auditContext.setSrcInode(inodePath.getLastExistingInode());
}
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme, DescendantType.ONE);
mMountTable.checkUnderWritableMountPoint(path);
if (context.isPersisted()) {
// Check if ufs is writable
checkUfsMode(path, OperationType.WRITE);
}
createFileInternal(rpcContext, inodePath, context);
auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true);
return getFileInfoInternal(inodePath);
}
}
/**
* @param rpcContext the rpc context
* @param inodePath the path to be created
* @param context the method context
* @return the list of created inodes
*/
List createFileInternal(RpcContext rpcContext, LockedInodePath inodePath,
CreateFileContext context)
throws InvalidPathException, FileAlreadyExistsException, BlockInfoException, IOException,
FileDoesNotExistException {
if (mWhitelist.inList(inodePath.getUri().toString())) {
context.setCacheable(true);
}
// If the create succeeded, the list of created inodes will not be empty.
List created = mInodeTree.createPath(rpcContext, inodePath, context);
if (context.isPersisted()) {
// The path exists in UFS, so it is no longer absent. The ancestors exist in UFS, but the
// actual file does not exist in UFS yet.
mUfsAbsentPathCache.processExisting(inodePath.getUri().getParent());
}
Metrics.FILES_CREATED.inc();
return created;
}
@Override
public long getNewBlockIdForFile(AlluxioURI path) throws FileDoesNotExistException,
InvalidPathException, AccessControlException, UnavailableException {
Metrics.GET_NEW_BLOCK_OPS.inc();
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree.lockFullInodePath(path, LockPattern.WRITE_INODE);
FileSystemMasterAuditContext auditContext =
createAuditContext("getNewBlockIdForFile", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
Metrics.NEW_BLOCKS_GOT.inc();
long blockId = mInodeTree.newBlock(rpcContext, NewBlockEntry.newBuilder()
.setId(inodePath.getInode().getId())
.build());
auditContext.setSucceeded(true);
return blockId;
}
}
@Override
public Map getMountTable() {
SortedMap mountPoints = new TreeMap<>();
for (Map.Entry mountPoint : mMountTable.getMountTable().entrySet()) {
mountPoints.put(mountPoint.getKey(), getDisplayMountPointInfo(mountPoint.getValue()));
}
return mountPoints;
}
@Override
public MountPointInfo getDisplayMountPointInfo(AlluxioURI path) throws InvalidPathException {
if (!mMountTable.isMountPoint(path)) {
throw new InvalidPathException(
ExceptionMessage.PATH_MUST_BE_MOUNT_POINT.getMessage(path));
}
return getDisplayMountPointInfo(mMountTable.getMountTable().get(path.toString()));
}
/**
* Gets the mount point information for display from a mount information.
*
* @param mountInfo the mount information to transform
* @return the mount point information
*/
private MountPointInfo getDisplayMountPointInfo(MountInfo mountInfo) {
MountPointInfo info = mountInfo.toDisplayMountPointInfo();
try (CloseableResource ufsResource =
mUfsManager.get(mountInfo.getMountId()).acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
info.setUfsType(ufs.getUnderFSType());
try {
info.setUfsCapacityBytes(
ufs.getSpace(info.getUfsUri(), UnderFileSystem.SpaceType.SPACE_TOTAL));
} catch (IOException e) {
LOG.warn("Cannot get total capacity of {}", info.getUfsUri(), e);
}
try {
info.setUfsUsedBytes(
ufs.getSpace(info.getUfsUri(), UnderFileSystem.SpaceType.SPACE_USED));
} catch (IOException e) {
LOG.warn("Cannot get used capacity of {}", info.getUfsUri(), e);
}
} catch (UnavailableException | NotFoundException e) {
// We should never reach here
LOG.error("No UFS cached for {}", info, e);
}
return info;
}
@Override
public long getInodeCount() {
return mInodeTree.getInodeCount();
}
@Override
public int getNumberOfPinnedFiles() {
return mInodeTree.getPinnedSize();
}
@Override
public void delete(AlluxioURI path, DeleteContext context)
throws IOException, FileDoesNotExistException, DirectoryNotEmptyException,
InvalidPathException, AccessControlException {
Metrics.DELETE_PATHS_OPS.inc();
LockingScheme lockingScheme =
createLockingScheme(path, context.getOptions().getCommonOptions(), LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath =
mInodeTree.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("delete", path, null, inodePath.getInodeOrNull())) {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
if (context.getOptions().getRecursive()) {
List failedChildren = new ArrayList<>();
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath childPath : descendants) {
try {
mPermissionChecker.checkPermission(Mode.Bits.WRITE, childPath);
} catch (AccessControlException e) {
failedChildren.add(e.getMessage());
}
}
if (failedChildren.size() > 0) {
throw new AccessControlException(ExceptionMessage.DELETE_FAILED_DIR_CHILDREN
.getMessage(path, StringUtils.join(failedChildren, ",")));
}
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
}
mMountTable.checkUnderWritableMountPoint(path);
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme,
context.getOptions().getRecursive() ? DescendantType.ALL : DescendantType.ONE);
if (!inodePath.fullPathExists()) {
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(path));
}
deleteInternal(rpcContext, inodePath, context);
auditContext.setSucceeded(true);
}
}
/**
* Implements file deletion.
*
* This method does not delete blocks. Instead, it returns deleted inodes so that their blocks can
* be deleted after the inode deletion journal entry has been written. We cannot delete blocks
* earlier because the inode deletion may fail, leaving us with inode containing deleted blocks.
*
* @param rpcContext the rpc context
* @param inodePath the file {@link LockedInodePath}
* @param deleteContext the method optitions
*/
@VisibleForTesting
public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath,
DeleteContext deleteContext) throws FileDoesNotExistException, IOException,
DirectoryNotEmptyException, InvalidPathException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
// TODO(jiri): A crash after any UFS object is deleted and before the delete operation is
// journaled will result in an inconsistency between Alluxio and UFS.
if (!inodePath.fullPathExists()) {
return;
}
long opTimeMs = System.currentTimeMillis();
Inode inode = inodePath.getInode();
if (inode == null) {
return;
}
boolean recursive = deleteContext.getOptions().getRecursive();
if (inode.isDirectory() && !recursive && mInodeStore.hasChildren(inode.asDirectory())) {
// inode is nonempty, and we don't want to delete a nonempty directory unless recursive is
// true
throw new DirectoryNotEmptyException(ExceptionMessage.DELETE_NONEMPTY_DIRECTORY_NONRECURSIVE,
inode.getName());
}
if (mInodeTree.isRootId(inode.getId())) {
// The root cannot be deleted.
throw new InvalidPathException(ExceptionMessage.DELETE_ROOT_DIRECTORY.getMessage());
}
// Inodes for which deletion will be attempted
List> inodesToDelete = new ArrayList<>();
// Add root of sub-tree to delete
inodesToDelete.add(new Pair<>(inodePath.getUri(), inodePath));
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath childPath : descendants) {
inodesToDelete.add(new Pair<>(mInodeTree.getPath(childPath.getInode()), childPath));
}
// Prepare to delete persisted inodes
UfsDeleter ufsDeleter = NoopUfsDeleter.INSTANCE;
if (!deleteContext.getOptions().getAlluxioOnly()) {
ufsDeleter = new SafeUfsDeleter(mMountTable, mInodeStore, inodesToDelete,
deleteContext.getOptions().build());
}
// Inodes to delete from tree after attempting to delete from UFS
List> revisedInodesToDelete = new ArrayList<>();
// Inodes that are not safe for recursive deletes
Set unsafeInodes = new HashSet<>();
// Alluxio URIs (and the reason for failure) which could not be deleted
List> failedUris = new ArrayList<>();
// We go through each inode, removing it from its parent set and from mDelInodes. If it's a
// file, we deal with the checkpoints and blocks as well.
for (int i = inodesToDelete.size() - 1; i >= 0; i--) {
Pair inodePairToDelete = inodesToDelete.get(i);
AlluxioURI alluxioUriToDelete = inodePairToDelete.getFirst();
Inode inodeToDelete = inodePairToDelete.getSecond().getInode();
String failureReason = null;
if (unsafeInodes.contains(inodeToDelete.getId())) {
failureReason = ExceptionMessage.DELETE_FAILED_DIR_NONEMPTY.getMessage();
} else if (inodeToDelete.isPersisted()) {
// If this is a mount point, we have deleted all the children and can unmount it
// TODO(calvin): Add tests (ALLUXIO-1831)
if (mMountTable.isMountPoint(alluxioUriToDelete)) {
mMountTable.delete(rpcContext, alluxioUriToDelete, true);
} else {
if (!deleteContext.getOptions().getAlluxioOnly()) {
try {
checkUfsMode(alluxioUriToDelete, OperationType.WRITE);
// Attempt to delete node if all children were deleted successfully
ufsDeleter.delete(alluxioUriToDelete, inodeToDelete);
} catch (AccessControlException e) {
// In case ufs is not writable, we will still attempt to delete other entries
// if any as they may be from a different mount point
LOG.warn(e.getMessage());
failureReason = e.getMessage();
} catch (IOException e) {
LOG.warn(e.getMessage());
failureReason = e.getMessage();
}
}
}
}
if (failureReason == null) {
if (inodeToDelete.isFile()) {
long fileId = inodeToDelete.getId();
// Remove the file from the set of files to persist.
mPersistRequests.remove(fileId);
// Cancel any ongoing jobs.
PersistJob job = mPersistJobs.get(fileId);
if (job != null) {
job.setCancelState(PersistJob.CancelState.TO_BE_CANCELED);
}
}
revisedInodesToDelete.add(new Pair<>(alluxioUriToDelete, inodePairToDelete.getSecond()));
} else {
unsafeInodes.add(inodeToDelete.getId());
// Propagate 'unsafe-ness' to parent as one of its descendants can't be deleted
unsafeInodes.add(inodeToDelete.getParentId());
failedUris.add(new Pair<>(alluxioUriToDelete.toString(), failureReason));
}
}
MountTable.Resolution resolution = mSyncManager.resolveSyncPoint(inodePath.getUri());
if (resolution != null) {
mSyncManager.stopSyncInternal(inodePath.getUri(), resolution.getMountId());
}
// Delete Inodes
for (Pair delInodePair : revisedInodesToDelete) {
LockedInodePath tempInodePath = delInodePair.getSecond();
mInodeTree.deleteInode(rpcContext, tempInodePath, opTimeMs);
}
if (!failedUris.isEmpty()) {
Collection messages = failedUris.stream()
.map(pair -> String.format("%s (%s)", pair.getFirst(), pair.getSecond()))
.collect(Collectors.toList());
throw new FailedPreconditionException(
ExceptionMessage.DELETE_FAILED_UFS.getMessage(StringUtils.join(messages, ", ")));
}
}
Metrics.PATHS_DELETED.inc(inodesToDelete.size());
}
@Override
public List getFileBlockInfoList(AlluxioURI path)
throws FileDoesNotExistException, InvalidPathException, AccessControlException,
UnavailableException {
Metrics.GET_FILE_BLOCK_INFO_OPS.inc();
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(path, LockPattern.READ);
FileSystemMasterAuditContext auditContext =
createAuditContext("getFileBlockInfoList", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.READ, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
List ret = getFileBlockInfoListInternal(inodePath);
Metrics.FILE_BLOCK_INFOS_GOT.inc();
auditContext.setSucceeded(true);
return ret;
}
}
/**
* @param inodePath the {@link LockedInodePath} to get the info for
* @return a list of {@link FileBlockInfo} for all the blocks of the given inode
*/
private List getFileBlockInfoListInternal(LockedInodePath inodePath)
throws InvalidPathException, FileDoesNotExistException, UnavailableException {
InodeFile file = inodePath.getInodeFile();
List blockInfoList = mBlockMaster.getBlockInfoList(file.getBlockIds());
List ret = new ArrayList<>();
for (BlockInfo blockInfo : blockInfoList) {
ret.add(generateFileBlockInfo(inodePath, blockInfo));
}
return ret;
}
/**
* Generates a {@link FileBlockInfo} object from internal metadata. This adds file information to
* the block, such as the file offset, and additional UFS locations for the block.
*
* @param inodePath the file the block is a part of
* @param blockInfo the {@link BlockInfo} to generate the {@link FileBlockInfo} from
* @return a new {@link FileBlockInfo} for the block
*/
private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo blockInfo)
throws FileDoesNotExistException {
InodeFile file = inodePath.getInodeFile();
FileBlockInfo fileBlockInfo = new FileBlockInfo();
fileBlockInfo.setBlockInfo(blockInfo);
fileBlockInfo.setUfsLocations(new ArrayList<>());
// The sequence number part of the block id is the block index.
long offset = file.getBlockSizeBytes() * BlockId.getSequenceNumber(blockInfo.getBlockId());
fileBlockInfo.setOffset(offset);
if (fileBlockInfo.getBlockInfo().getLocations().isEmpty() && file.isPersisted()) {
// No alluxio locations, but there is a checkpoint in the under storage system. Add the
// locations from the under storage system.
long blockId = fileBlockInfo.getBlockInfo().getBlockId();
List locations = mUfsBlockLocationCache.get(blockId, inodePath.getUri(),
fileBlockInfo.getOffset());
if (locations != null) {
fileBlockInfo.setUfsLocations(locations);
}
}
return fileBlockInfo;
}
/**
* Returns whether the inodeFile is fully in Alluxio or not. The file is fully in Alluxio only if
* all the blocks of the file are in Alluxio, in other words, the in-Alluxio percentage is 100.
*
* @return true if the file is fully in Alluxio, false otherwise
*/
private boolean isFullyInAlluxio(InodeFile inode) throws UnavailableException {
return getInAlluxioPercentage(inode) == 100;
}
/**
* Returns whether the inodeFile is fully in memory or not. The file is fully in memory only if
* all the blocks of the file are in memory, in other words, the in-memory percentage is 100.
*
* @return true if the file is fully in Alluxio, false otherwise
*/
private boolean isFullyInMemory(InodeFile inode) throws UnavailableException {
return getInMemoryPercentage(inode) == 100;
}
@Override
public List getInAlluxioFiles() throws UnavailableException {
List files = new ArrayList<>();
LockedInodePath rootPath;
try {
rootPath =
mInodeTree.lockFullInodePath(new AlluxioURI(AlluxioURI.SEPARATOR), LockPattern.READ);
} catch (FileDoesNotExistException | InvalidPathException e) {
// Root should always exist.
throw new RuntimeException(e);
}
try (LockedInodePath inodePath = rootPath) {
getInAlluxioFilesInternal(inodePath, files);
}
return files;
}
@Override
public List getInMemoryFiles() throws UnavailableException {
List files = new ArrayList<>();
LockedInodePath rootPath;
try {
rootPath =
mInodeTree.lockFullInodePath(new AlluxioURI(AlluxioURI.SEPARATOR), LockPattern.READ);
} catch (FileDoesNotExistException | InvalidPathException e) {
// Root should always exist.
throw new RuntimeException(e);
}
try (LockedInodePath inodePath = rootPath) {
getInMemoryFilesInternal(inodePath, files);
}
return files;
}
/**
* Adds in-Alluxio files to the array list passed in. This method assumes the inode passed in is
* already read locked.
*
* @param inodePath the inode path to search
* @param files the list to accumulate the results in
*/
private void getInAlluxioFilesInternal(LockedInodePath inodePath, List files)
throws UnavailableException {
Inode inode = inodePath.getInodeOrNull();
if (inode == null) {
return;
}
if (inode.isFile()) {
if (isFullyInAlluxio(inode.asFile())) {
files.add(inodePath.getUri());
}
} else {
// This inode is a directory.
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
try (LockedInodePath childPath = inodePath.lockChild(child, LockPattern.READ)) {
getInAlluxioFilesInternal(childPath, files);
} catch (InvalidPathException e) {
// Inode is no longer a child, continue.
continue;
}
}
}
}
/**
* Adds in-memory files to the array list passed in. This method assumes the inode passed in is
* already read locked.
*
* @param inodePath the inode path to search
* @param files the list to accumulate the results in
*/
private void getInMemoryFilesInternal(LockedInodePath inodePath, List files)
throws UnavailableException {
Inode inode = inodePath.getInodeOrNull();
if (inode == null) {
return;
}
if (inode.isFile()) {
if (isFullyInMemory(inode.asFile())) {
files.add(inodePath.getUri());
}
} else {
// This inode is a directory.
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
try (LockedInodePath childPath = inodePath.lockChild(child, LockPattern.READ)) {
getInMemoryFilesInternal(childPath, files);
} catch (InvalidPathException e) {
// Inode is no longer a child, continue.
continue;
}
}
}
}
/**
* Gets the in-memory percentage of an Inode. For a file that has all blocks in Alluxio, it
* returns 100; for a file that has no block in memory, it returns 0. Returns 0 for a directory.
*
* @param inode the inode
* @return the in memory percentage
*/
private int getInMemoryPercentage(Inode inode) throws UnavailableException {
if (!inode.isFile()) {
return 0;
}
InodeFile inodeFile = inode.asFile();
long length = inodeFile.getLength();
if (length == 0) {
return 100;
}
long inMemoryLength = 0;
for (BlockInfo info : mBlockMaster.getBlockInfoList(inodeFile.getBlockIds())) {
if (isInTopStorageTier(info)) {
inMemoryLength += info.getLength();
}
}
return (int) (inMemoryLength * 100 / length);
}
/**
* Gets the in-Alluxio percentage of an Inode. For a file that has all blocks in Alluxio, it
* returns 100; for a file that has no block in Alluxio, it returns 0. Returns 0 for a directory.
*
* @param inode the inode
* @return the in alluxio percentage
*/
private int getInAlluxioPercentage(Inode inode) throws UnavailableException {
if (!inode.isFile()) {
return 0;
}
InodeFile inodeFile = inode.asFile();
long length = inodeFile.getLength();
if (length == 0) {
return 100;
}
long inAlluxioLength = 0;
for (BlockInfo info : mBlockMaster.getBlockInfoList(inodeFile.getBlockIds())) {
if (!info.getLocations().isEmpty()) {
inAlluxioLength += info.getLength();
}
}
return (int) (inAlluxioLength * 100 / length);
}
/**
* @return true if the given block is in the top storage level in some worker, false otherwise
*/
private boolean isInTopStorageTier(BlockInfo blockInfo) {
for (BlockLocation location : blockInfo.getLocations()) {
if (mBlockMaster.getGlobalStorageTierAssoc().getOrdinal(location.getTierAlias()) == 0) {
return true;
}
}
return false;
}
@Override
public long createDirectory(AlluxioURI path, CreateDirectoryContext context)
throws InvalidPathException, FileAlreadyExistsException, IOException, AccessControlException,
FileDoesNotExistException {
LOG.debug("createDirectory {} ", path);
Metrics.CREATE_DIRECTORIES_OPS.inc();
LockingScheme lockingScheme = createLockingScheme(path, context.getOptions().getCommonOptions(),
LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("mkdir", path, null, inodePath.getParentInodeOrNull())) {
if (context.getOptions().getRecursive()) {
auditContext.setSrcInode(inodePath.getLastExistingInode());
}
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme, DescendantType.ONE);
mMountTable.checkUnderWritableMountPoint(path);
if (context.isPersisted()) {
checkUfsMode(path, OperationType.WRITE);
}
createDirectoryInternal(rpcContext, inodePath, context);
auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true);
return inodePath.getInode().getId();
}
}
/**
* Implementation of directory creation for a given path.
*
* @param rpcContext the rpc context
* @param inodePath the path of the directory
* @param context method context
* @return a list of created inodes
*/
private List createDirectoryInternal(RpcContext rpcContext, LockedInodePath inodePath,
CreateDirectoryContext context) throws InvalidPathException, FileAlreadyExistsException,
IOException, FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
try {
List createResult = mInodeTree.createPath(rpcContext, inodePath, context);
InodeDirectory inodeDirectory = inodePath.getInode().asDirectory();
String ufsFingerprint = Constants.INVALID_UFS_FINGERPRINT;
if (inodeDirectory.isPersisted()) {
UfsStatus ufsStatus = context.getUfsStatus();
// Retrieve the UFS fingerprint for this file.
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
AlluxioURI resolvedUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (ufsStatus == null) {
ufsFingerprint = ufs.getFingerprint(resolvedUri.toString());
} else {
ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize();
}
}
}
mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder()
.setId(inodeDirectory.getId())
.setUfsFingerprint(ufsFingerprint)
.build());
if (context.isPersisted()) {
// The path exists in UFS, so it is no longer absent.
mUfsAbsentPathCache.processExisting(inodePath.getUri());
}
Metrics.DIRECTORIES_CREATED.inc();
return createResult;
} catch (BlockInfoException e) {
// Since we are creating a directory, the block size is ignored, no such exception should
// happen.
throw new RuntimeException(e);
}
}
@Override
public void rename(AlluxioURI srcPath, AlluxioURI dstPath, RenameContext context)
throws FileAlreadyExistsException, FileDoesNotExistException, InvalidPathException,
IOException, AccessControlException {
Metrics.RENAME_PATH_OPS.inc();
LockingScheme srcLockingScheme = createLockingScheme(srcPath,
context.getOptions().getCommonOptions(), LockPattern.WRITE_EDGE);
LockingScheme dstLockingScheme = createLockingScheme(dstPath,
context.getOptions().getCommonOptions(), LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
InodePathPair inodePathPair = mInodeTree
.lockInodePathPair(srcLockingScheme.getPath(), srcLockingScheme.getPattern(),
dstLockingScheme.getPath(), dstLockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("rename", srcPath, dstPath, null)) {
LockedInodePath srcInodePath = inodePathPair.getFirst();
LockedInodePath dstInodePath = inodePathPair.getSecond();
auditContext.setSrcInode(srcInodePath.getParentInodeOrNull());
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, srcInodePath);
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, dstInodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
// Possible ufs sync.
syncMetadata(rpcContext, srcInodePath, srcLockingScheme, DescendantType.ONE);
syncMetadata(rpcContext, dstInodePath, dstLockingScheme, DescendantType.ONE);
mMountTable.checkUnderWritableMountPoint(srcPath);
mMountTable.checkUnderWritableMountPoint(dstPath);
renameInternal(rpcContext, srcInodePath, dstInodePath, context);
auditContext.setSrcInode(srcInodePath.getInode()).setSucceeded(true);
LOG.debug("Renamed {} to {}", srcPath, dstPath);
}
}
private boolean shouldPersistPath(String path) {
for (String pattern : mPersistBlacklist) {
if (path.contains(pattern)) {
LOG.debug("Not persisting path {} because it is in {}: {}", path,
PropertyKey.Name.MASTER_PERSISTENCE_BLACKLIST, mPersistBlacklist);
return false;
}
}
return true;
}
/**
* Renames a file to a destination.
*
* @param rpcContext the rpc context
* @param srcInodePath the source path to rename
* @param dstInodePath the destination path to rename the file to
* @param context method options
*/
private void renameInternal(RpcContext rpcContext, LockedInodePath srcInodePath,
LockedInodePath dstInodePath, RenameContext context) throws InvalidPathException,
FileDoesNotExistException, FileAlreadyExistsException, IOException, AccessControlException {
if (!srcInodePath.fullPathExists()) {
throw new FileDoesNotExistException(
ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(srcInodePath.getUri()));
}
Inode srcInode = srcInodePath.getInode();
// Renaming path to itself is a no-op.
if (srcInodePath.getUri().equals(dstInodePath.getUri())) {
return;
}
// Renaming the root is not allowed.
if (srcInodePath.getUri().isRoot()) {
throw new InvalidPathException(ExceptionMessage.ROOT_CANNOT_BE_RENAMED.getMessage());
}
if (dstInodePath.getUri().isRoot()) {
throw new InvalidPathException(ExceptionMessage.RENAME_CANNOT_BE_TO_ROOT.getMessage());
}
// Renaming across mount points is not allowed.
String srcMount = mMountTable.getMountPoint(srcInodePath.getUri());
String dstMount = mMountTable.getMountPoint(dstInodePath.getUri());
if ((srcMount == null && dstMount != null) || (srcMount != null && dstMount == null) || (
srcMount != null && dstMount != null && !srcMount.equals(dstMount))) {
throw new InvalidPathException(ExceptionMessage.RENAME_CANNOT_BE_ACROSS_MOUNTS
.getMessage(srcInodePath.getUri(), dstInodePath.getUri()));
}
// Renaming onto a mount point is not allowed.
if (mMountTable.isMountPoint(dstInodePath.getUri())) {
throw new InvalidPathException(
ExceptionMessage.RENAME_CANNOT_BE_ONTO_MOUNT_POINT.getMessage(dstInodePath.getUri()));
}
// Renaming a path to one of its subpaths is not allowed. Check for that, by making sure
// srcComponents isn't a prefix of dstComponents.
if (PathUtils.hasPrefix(dstInodePath.getUri().getPath(), srcInodePath.getUri().getPath())) {
throw new InvalidPathException(ExceptionMessage.RENAME_CANNOT_BE_TO_SUBDIRECTORY
.getMessage(srcInodePath.getUri(), dstInodePath.getUri()));
}
// Get the inodes of the src and dst parents.
Inode srcParentInode = srcInodePath.getParentInodeDirectory();
if (!srcParentInode.isDirectory()) {
throw new InvalidPathException(
ExceptionMessage.PATH_MUST_HAVE_VALID_PARENT.getMessage(srcInodePath.getUri()));
}
Inode dstParentInode = dstInodePath.getParentInodeDirectory();
if (!dstParentInode.isDirectory()) {
throw new InvalidPathException(
ExceptionMessage.PATH_MUST_HAVE_VALID_PARENT.getMessage(dstInodePath.getUri()));
}
// Make sure destination path does not exist
if (dstInodePath.fullPathExists()) {
throw new FileAlreadyExistsException(
ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(dstInodePath.getUri()));
}
// Now we remove srcInode from its parent and insert it into dstPath's parent
renameInternal(rpcContext, srcInodePath, dstInodePath, false, context);
// Check options and determine if we should schedule async persist. This is helpful for compute
// frameworks that use rename as a commit operation.
if (context.getPersist() && srcInode.isFile() && !srcInode.isPersisted()
&& shouldPersistPath(dstInodePath.toString())) {
LOG.debug("Schedule Async Persist on rename for File {}", srcInodePath);
mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder()
.setId(srcInode.getId())
.setPersistenceState(PersistenceState.TO_BE_PERSISTED.name())
.build());
long shouldPersistTime = srcInode.asFile().getShouldPersistTime();
long persistenceWaitTime = shouldPersistTime == Constants.NO_AUTO_PERSIST ? 0
: getPersistenceWaitTime(shouldPersistTime);
mPersistRequests.put(srcInode.getId(), new alluxio.time.ExponentialTimer(
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_INITIAL_INTERVAL_MS),
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_INTERVAL_MS),
persistenceWaitTime,
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_TOTAL_WAIT_TIME_MS)));
}
// If a directory is being renamed with persist on rename, attempt to persist children
if (srcInode.isDirectory() && context.getPersist()
&& shouldPersistPath(dstInodePath.toString())) {
LOG.debug("Schedule Async Persist on rename for Dir: {}", dstInodePath);
try (LockedInodePathList descendants = mInodeTree.getDescendants(srcInodePath)) {
for (LockedInodePath childPath : descendants) {
Inode childInode = childPath.getInode();
// TODO(apc999): Resolve the child path legitimately
if (childInode.isFile() && !childInode.isPersisted()
&& shouldPersistPath(
childPath.toString().substring(srcInodePath.toString().length()))) {
LOG.debug("Schedule Async Persist on rename for Child File: {}", childPath);
mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder()
.setId(childInode.getId())
.setPersistenceState(PersistenceState.TO_BE_PERSISTED.name())
.build());
long shouldPersistTime = childInode.asFile().getShouldPersistTime();
long persistenceWaitTime = shouldPersistTime == Constants.NO_AUTO_PERSIST ? 0
: getPersistenceWaitTime(shouldPersistTime);
mPersistRequests.put(childInode.getId(), new alluxio.time.ExponentialTimer(
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_INITIAL_INTERVAL_MS),
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_INTERVAL_MS),
persistenceWaitTime,
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_TOTAL_WAIT_TIME_MS)));
}
}
}
}
}
/**
* Implements renaming.
*
* @param rpcContext the rpc context
* @param srcInodePath the path of the rename source
* @param dstInodePath the path to the rename destination
* @param replayed whether the operation is a result of replaying the journal
* @param context method options
*/
private void renameInternal(RpcContext rpcContext, LockedInodePath srcInodePath,
LockedInodePath dstInodePath, boolean replayed, RenameContext context)
throws FileDoesNotExistException, InvalidPathException, IOException, AccessControlException {
// Rename logic:
// 1. Change the source inode name to the destination name.
// 2. Insert the source inode into the destination parent.
// 3. Do UFS operations if necessary.
// 4. Remove the source inode (reverting the name) from the source parent.
// 5. Set the last modification times for both source and destination parent inodes.
Inode srcInode = srcInodePath.getInode();
AlluxioURI srcPath = srcInodePath.getUri();
AlluxioURI dstPath = dstInodePath.getUri();
InodeDirectory srcParentInode = srcInodePath.getParentInodeDirectory();
InodeDirectory dstParentInode = dstInodePath.getParentInodeDirectory();
String srcName = srcPath.getName();
String dstName = dstPath.getName();
LOG.debug("Renaming {} to {}", srcPath, dstPath);
if (dstInodePath.fullPathExists()) {
throw new InvalidPathException("Destination path: " + dstPath + " already exists.");
}
mInodeTree.rename(rpcContext, RenameEntry.newBuilder()
.setId(srcInode.getId())
.setOpTimeMs(context.getOperationTimeMs())
.setNewParentId(dstParentInode.getId())
.setNewName(dstName)
.setPath(srcPath.getPath())
.setNewPath(dstPath.getPath())
.build());
// 3. Do UFS operations if necessary.
// If the source file is persisted, rename it in the UFS.
try {
if (!replayed && srcInode.isPersisted()) {
// Check if ufs is writable
checkUfsMode(srcPath, OperationType.WRITE);
checkUfsMode(dstPath, OperationType.WRITE);
MountTable.Resolution resolution = mMountTable.resolve(srcPath);
// Persist ancestor directories from top to the bottom. We cannot use recursive create
// parents here because the permission for the ancestors can be different.
// inodes from the same mount point as the dst
Stack sameMountDirs = new Stack<>();
List dstInodeList = dstInodePath.getInodeList();
for (int i = dstInodeList.size() - 1; i >= 0; i--) {
// Since dstInodePath is guaranteed not to be a full path, all inodes in the incomplete
// path are guaranteed to be a directory.
InodeDirectory dir = dstInodeList.get(i).asDirectory();
sameMountDirs.push(dir);
if (dir.isMountPoint()) {
break;
}
}
while (!sameMountDirs.empty()) {
InodeDirectory dir = sameMountDirs.pop();
if (!dir.isPersisted()) {
mInodeTree.syncPersistExistingDirectory(rpcContext, dir);
}
}
String ufsSrcPath = resolution.getUri().toString();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
String ufsDstUri = mMountTable.resolve(dstPath).getUri().toString();
boolean success;
if (srcInode.isFile()) {
success = ufs.renameRenamableFile(ufsSrcPath, ufsDstUri);
} else {
success = ufs.renameRenamableDirectory(ufsSrcPath, ufsDstUri);
}
if (!success) {
throw new IOException(
ExceptionMessage.FAILED_UFS_RENAME.getMessage(ufsSrcPath, ufsDstUri));
}
}
// The destination was persisted in ufs.
mUfsAbsentPathCache.processExisting(dstPath);
}
} catch (Throwable t) {
// On failure, revert changes and throw exception.
mInodeTree.rename(rpcContext, RenameEntry.newBuilder()
.setId(srcInode.getId())
.setOpTimeMs(context.getOperationTimeMs())
.setNewName(srcName)
.setNewParentId(srcParentInode.getId())
.setPath(dstPath.getPath())
.setNewPath(srcPath.getPath())
.build());
throw t;
}
Metrics.PATHS_RENAMED.inc();
}
/**
* Propagates the persisted status to all parents of the given inode in the same mount partition.
*
* @param journalContext the journal context
* @param inodePath the inode to start the propagation at
* @return list of inodes which were marked as persisted
*/
private void propagatePersistedInternal(Supplier journalContext,
LockedInodePath inodePath) throws FileDoesNotExistException {
Inode inode = inodePath.getInode();
List inodes = inodePath.getInodeList();
// Traverse the inodes from target inode to the root.
Collections.reverse(inodes);
// Skip the first, to not examine the target inode itself.
inodes = inodes.subList(1, inodes.size());
List persistedInodes = new ArrayList<>();
for (Inode ancestor : inodes) {
// the path is already locked.
AlluxioURI path = mInodeTree.getPath(ancestor);
if (mMountTable.isMountPoint(path)) {
// Stop propagating the persisted status at mount points.
break;
}
if (ancestor.isPersisted()) {
// Stop if a persisted directory is encountered.
break;
}
mInodeTree.updateInode(journalContext, UpdateInodeEntry.newBuilder()
.setId(ancestor.getId())
.setPersistenceState(PersistenceState.PERSISTED.name())
.build());
}
}
@Override
public void free(AlluxioURI path, FreeContext context)
throws FileDoesNotExistException, InvalidPathException, AccessControlException,
UnexpectedAlluxioException, IOException {
Metrics.FREE_FILE_OPS.inc();
// No need to syncMetadata before free.
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree.lockFullInodePath(path, LockPattern.WRITE_INODE);
FileSystemMasterAuditContext auditContext =
createAuditContext("free", path, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkPermission(Mode.Bits.READ, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
freeInternal(rpcContext, inodePath, context);
auditContext.setSucceeded(true);
}
}
/**
* Implements free operation.
*
* @param rpcContext the rpc context
* @param inodePath inode of the path to free
* @param context context to free method
*/
private void freeInternal(RpcContext rpcContext, LockedInodePath inodePath, FreeContext context)
throws FileDoesNotExistException, UnexpectedAlluxioException,
IOException, InvalidPathException, AccessControlException {
Inode inode = inodePath.getInode();
if (inode.isDirectory() && !context.getOptions().getRecursive()
&& mInodeStore.hasChildren(inode.asDirectory())) {
// inode is nonempty, and we don't free a nonempty directory unless recursive is true
throw new UnexpectedAlluxioException(
ExceptionMessage.CANNOT_FREE_NON_EMPTY_DIR.getMessage(mInodeTree.getPath(inode)));
}
long opTimeMs = System.currentTimeMillis();
List freeInodes = new ArrayList<>();
freeInodes.add(inode);
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath descedant : Iterables.concat(descendants,
Collections.singleton(inodePath))) {
Inode freeInode = descedant.getInodeOrNull();
if (freeInode != null && freeInode.isFile()) {
if (freeInode.getPersistenceState() != PersistenceState.PERSISTED) {
throw new UnexpectedAlluxioException(ExceptionMessage.CANNOT_FREE_NON_PERSISTED_FILE
.getMessage(mInodeTree.getPath(freeInode)));
}
if (freeInode.isPinned()) {
if (!context.getOptions().getForced()) {
throw new UnexpectedAlluxioException(ExceptionMessage.CANNOT_FREE_PINNED_FILE
.getMessage(mInodeTree.getPath(freeInode)));
}
SetAttributeContext setAttributeContext = SetAttributeContext
.mergeFrom(SetAttributePOptions.newBuilder().setRecursive(false).setPinned(false));
setAttributeSingleFile(rpcContext, descedant, true, opTimeMs, setAttributeContext);
}
// Remove corresponding blocks from workers.
mBlockMaster.removeBlocks(freeInode.asFile().getBlockIds(), false /* delete */);
}
}
}
Metrics.FILES_FREED.inc(freeInodes.size());
}
@Override
public AlluxioURI getPath(long fileId) throws FileDoesNotExistException {
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ)) {
// the path is already locked.
return mInodeTree.getPath(inodePath.getInode());
}
}
@Override
public Set getPinIdList() {
// return both the explicitly pinned inodes and not persisted inodes which should not be evicted
return Sets.union(mInodeTree.getPinIdSet(), mInodeTree.getToBePersistedIds());
}
@Override
public String getUfsAddress() {
return ServerConfiguration.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS);
}
@Override
public UfsInfo getUfsInfo(long mountId) {
MountInfo info = mMountTable.getMountInfo(mountId);
if (info == null) {
return new UfsInfo();
}
MountPOptions options = info.getOptions();
return new UfsInfo().setUri(info.getUfsUri())
.setMountOptions(MountContext
.mergeFrom(MountPOptions.newBuilder().putAllProperties(options.getPropertiesMap())
.setReadOnly(options.getReadOnly()).setShared(options.getShared()))
.getOptions().build());
}
@Override
public List getWhiteList() {
return mWhitelist.getList();
}
@Override
public List getLostFiles() {
Set lostFiles = new HashSet<>();
for (long blockId : mBlockMaster.getLostBlocks()) {
// the file id is the container id of the block id
long containerId = BlockId.getContainerId(blockId);
long fileId = IdUtils.createFileId(containerId);
lostFiles.add(fileId);
}
return new ArrayList<>(lostFiles);
}
/**
* Loads metadata for the object identified by the given path from UFS into Alluxio.
*
* This operation requires users to have WRITE permission on the path
* and its parent path if path is a file, or WRITE permission on the
* parent path if path is a directory.
*
* @param rpcContext the rpc context
* @param inodePath the path for which metadata should be loaded
* @param context the load metadata context
* @throws AccessControlException if permission checking fails
* @throws BlockInfoException if an invalid block size is encountered
* @throws FileAlreadyCompletedException if the file is already completed
* @throws FileDoesNotExistException if there is no UFS path
* @throws InvalidFileSizeException if invalid file size is encountered
* @throws InvalidPathException if invalid path is encountered
*/
private void loadMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath,
LoadMetadataContext context)
throws AccessControlException, BlockInfoException, FileAlreadyCompletedException,
FileDoesNotExistException, InvalidFileSizeException, InvalidPathException, IOException {
AlluxioURI path = inodePath.getUri();
MountTable.Resolution resolution = mMountTable.resolve(path);
AlluxioURI ufsUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (context.getUfsStatus() == null && !ufs.exists(ufsUri.toString())) {
// uri does not exist in ufs
InodeDirectory inode = inodePath.getInode().asDirectory();
mInodeTree.setDirectChildrenLoaded(rpcContext, inode);
return;
}
boolean isFile;
if (context.getUfsStatus() != null) {
isFile = context.getUfsStatus().isFile();
} else {
isFile = ufs.isFile(ufsUri.toString());
}
if (isFile) {
loadFileMetadataInternal(rpcContext, inodePath, resolution, context);
} else {
loadDirectoryMetadata(rpcContext, inodePath, context);
if (context.getOptions().getLoadDescendantType() != LoadDescendantPType.NONE) {
ListOptions listOptions = ListOptions.defaults();
if (context.getOptions().getLoadDescendantType() == LoadDescendantPType.ALL) {
listOptions.setRecursive(true);
} else {
listOptions.setRecursive(false);
}
UfsStatus[] children = ufs.listStatus(ufsUri.toString(), listOptions);
// children can be null if the pathname does not denote a directory
// or if the we do not have permission to listStatus on the directory in the ufs.
if (children == null) {
throw new IOException("Failed to loadMetadata because ufs can not listStatus at path "
+ ufsUri.toString());
}
Arrays.sort(children, Comparator.comparing(UfsStatus::getName));
for (UfsStatus childStatus : children) {
if (PathUtils.isTemporaryFileName(childStatus.getName())) {
continue;
}
AlluxioURI childURI = new AlluxioURI(
PathUtils.concatPath(inodePath.getUri(), childStatus.getName()));
if (mInodeTree.inodePathExists(childURI) && (childStatus.isFile()
|| context.getOptions().getLoadDescendantType() != LoadDescendantPType.ALL)) {
// stop traversing if this is an existing file, or an existing directory without
// loading all descendants.
continue;
}
try (LockedInodePath descendant = inodePath.lockDescendant(
inodePath.getUri().joinUnsafe(childStatus.getName()), LockPattern.READ)) {
LoadMetadataContext loadMetadataContext = LoadMetadataContext
.mergeFrom(LoadMetadataPOptions.newBuilder()
.setLoadDescendantType(LoadDescendantPType.NONE).setCreateAncestors(false))
.setUfsStatus(childStatus);
try {
loadMetadataInternal(rpcContext, descendant, loadMetadataContext);
} catch (FileNotFoundException e) {
LOG.debug(
"Failed to loadMetadata because file is not in ufs:"
+ " inodePath={}, options={}.",
descendant.getUri(), loadMetadataContext, e);
continue;
} catch (Exception e) {
LOG.info("Failed to loadMetadata: inodePath={}, options={}.", descendant.getUri(),
loadMetadataContext, e);
continue;
}
if (context.getOptions().getLoadDescendantType() == LoadDescendantPType.ALL
&& descendant.getInode().isDirectory()) {
mInodeTree.setDirectChildrenLoaded(rpcContext, descendant.getInode().asDirectory());
}
}
}
mInodeTree.setDirectChildrenLoaded(rpcContext, inodePath.getInode().asDirectory());
}
}
} catch (IOException e) {
LOG.debug("Failed to loadMetadata: inodePath={}, context={}.", inodePath.getUri(),
context, e);
throw e;
}
}
/**
* Loads metadata for the file identified by the given path from UFS into Alluxio.
*
* This method doesn't require any specific type of locking on inodePath. If the path needs to be
* loaded, we will acquire a write-edge lock.
*
* @param rpcContext the rpc context
* @param inodePath the path for which metadata should be loaded
* @param resolution the UFS resolution of path
* @param context the load metadata context
*/
private void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath,
MountTable.Resolution resolution, LoadMetadataContext context)
throws BlockInfoException, FileDoesNotExistException, InvalidPathException,
FileAlreadyCompletedException, InvalidFileSizeException, IOException {
if (inodePath.fullPathExists()) {
return;
}
AlluxioURI ufsUri = resolution.getUri();
long ufsBlockSizeByte;
long ufsLength;
AccessControlList acl = null;
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (context.getUfsStatus() == null) {
context.setUfsStatus(ufs.getExistingFileStatus(ufsUri.toString()));
}
ufsLength = ((UfsFileStatus) context.getUfsStatus()).getContentLength();
long blockSize = ((UfsFileStatus) context.getUfsStatus()).getBlockSize();
ufsBlockSizeByte = blockSize != UfsFileStatus.UNKNOWN_BLOCK_SIZE
? blockSize : ufs.getBlockSizeByte(ufsUri.toString());
if (isAclEnabled()) {
Pair aclPair
= ufs.getAclPair(ufsUri.toString());
if (aclPair != null) {
acl = aclPair.getFirst();
// DefaultACL should be null, because it is a file
if (aclPair.getSecond() != null) {
LOG.warn("File {} has default ACL in the UFS", inodePath.getUri());
}
}
}
}
// Metadata loaded from UFS has no TTL set.
CreateFileContext createFileContext = CreateFileContext.defaults();
createFileContext.getOptions().setBlockSizeBytes(ufsBlockSizeByte);
createFileContext.getOptions().setRecursive(context.getOptions().getCreateAncestors());
createFileContext.getOptions()
.setCommonOptions(FileSystemMasterCommonPOptions.newBuilder()
.setTtl(context.getOptions().getCommonOptions().getTtl())
.setTtlAction(context.getOptions().getCommonOptions().getTtlAction()));
createFileContext.setWriteType(WriteType.THROUGH); // set as through since already in UFS
createFileContext.setMetadataLoad(true);
createFileContext.setOwner(context.getUfsStatus().getOwner());
createFileContext.setGroup(context.getUfsStatus().getGroup());
createFileContext.setXAttr(context.getUfsStatus().getXAttr());
short ufsMode = context.getUfsStatus().getMode();
Mode mode = new Mode(ufsMode);
Long ufsLastModified = context.getUfsStatus().getLastModifiedTime();
if (resolution.getShared()) {
mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits()));
}
createFileContext.getOptions().setMode(mode.toProto());
if (acl != null) {
createFileContext.setAcl(acl.getEntries());
}
if (ufsLastModified != null) {
createFileContext.setOperationTimeMs(ufsLastModified);
}
try (LockedInodePath writeLockedPath = inodePath.lockFinalEdgeWrite()) {
createFileInternal(rpcContext, writeLockedPath, createFileContext);
CompleteFileContext completeContext =
CompleteFileContext.mergeFrom(CompleteFilePOptions.newBuilder().setUfsLength(ufsLength))
.setUfsStatus(context.getUfsStatus());
if (ufsLastModified != null) {
completeContext.setOperationTimeMs(ufsLastModified);
}
completeFileInternal(rpcContext, writeLockedPath, completeContext);
} catch (FileAlreadyExistsException e) {
// This may occur if a thread created or loaded the file before we got the write lock.
// The file already exists, so nothing needs to be loaded.
LOG.debug("Failed to load file metadata: {}", e.toString());
}
// Re-traverse the path to pick up any newly created inodes.
inodePath.traverse();
}
/**
* Loads metadata for the directory identified by the given path from UFS into Alluxio. This does
* not actually require looking at the UFS path.
* It is a no-op if the directory exists.
*
* This method doesn't require any specific type of locking on inodePath. If the path needs to be
* loaded, we will acquire a write-edge lock if necessary.
*
* @param rpcContext the rpc context
* @param inodePath the path for which metadata should be loaded
* @param context the load metadata context
*/
private void loadDirectoryMetadata(RpcContext rpcContext, LockedInodePath inodePath,
LoadMetadataContext context)
throws FileDoesNotExistException, InvalidPathException, AccessControlException, IOException {
if (inodePath.fullPathExists()) {
return;
}
CreateDirectoryContext createDirectoryContext = CreateDirectoryContext.defaults();
createDirectoryContext.getOptions()
.setRecursive(context.getOptions().getCreateAncestors()).setAllowExists(false)
.setCommonOptions(FileSystemMasterCommonPOptions.newBuilder()
.setTtl(context.getOptions().getCommonOptions().getTtl())
.setTtlAction(context.getOptions().getCommonOptions().getTtlAction()));
createDirectoryContext.setMountPoint(mMountTable.isMountPoint(inodePath.getUri()));
createDirectoryContext.setMetadataLoad(true);
createDirectoryContext.setWriteType(WriteType.THROUGH);
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
AlluxioURI ufsUri = resolution.getUri();
AccessControlList acl = null;
DefaultAccessControlList defaultAcl = null;
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (context.getUfsStatus() == null) {
context.setUfsStatus(ufs.getExistingDirectoryStatus(ufsUri.toString()));
}
Pair aclPair =
ufs.getAclPair(ufsUri.toString());
if (aclPair != null) {
acl = aclPair.getFirst();
defaultAcl = aclPair.getSecond();
}
}
String ufsOwner = context.getUfsStatus().getOwner();
String ufsGroup = context.getUfsStatus().getGroup();
short ufsMode = context.getUfsStatus().getMode();
Long lastModifiedTime = context.getUfsStatus().getLastModifiedTime();
Mode mode = new Mode(ufsMode);
if (resolution.getShared()) {
mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits()));
}
createDirectoryContext.getOptions().setMode(mode.toProto());
createDirectoryContext.setOwner(ufsOwner).setGroup(ufsGroup)
.setUfsStatus(context.getUfsStatus());
createDirectoryContext.setXAttr(context.getUfsStatus().getXAttr());
if (acl != null) {
createDirectoryContext.setAcl(acl.getEntries());
}
if (defaultAcl != null) {
createDirectoryContext.setDefaultAcl(defaultAcl.getEntries());
}
if (lastModifiedTime != null) {
createDirectoryContext.setOperationTimeMs(lastModifiedTime);
}
try (LockedInodePath writeLockedPath = inodePath.lockFinalEdgeWrite()) {
createDirectoryInternal(rpcContext, writeLockedPath, createDirectoryContext);
} catch (FileAlreadyExistsException e) {
// This may occur if a thread created or loaded the directory before we got the write lock.
// The directory already exists, so nothing needs to be loaded.
}
// Re-traverse the path to pick up any newly created inodes.
inodePath.traverse();
}
/**
* Loads metadata for the path if it is (non-existing || load direct children is set).
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to load the metadata for
* @param context the load metadata context
*/
private void loadMetadataIfNotExist(RpcContext rpcContext, LockedInodePath inodePath,
LoadMetadataContext context) {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.READ);
boolean inodeExists = inodePath.fullPathExists();
boolean loadDirectChildren = false;
if (inodeExists) {
try {
Inode inode = inodePath.getInode();
loadDirectChildren = inode.isDirectory()
&& (context.getOptions().getLoadDescendantType() != LoadDescendantPType.NONE);
} catch (FileDoesNotExistException e) {
// This should never happen.
throw new RuntimeException(e);
}
}
if (!inodeExists || loadDirectChildren) {
try {
loadMetadataInternal(rpcContext, inodePath, context);
} catch (AlluxioException | IOException e) {
LOG.debug("Failed to load metadata for path from UFS: {}", inodePath.getUri(), e);
}
}
}
private void prepareForMount(AlluxioURI ufsPath, long mountId, MountContext context)
throws IOException {
MountPOptions.Builder mountOption = context.getOptions();
try (CloseableResource ufsResource =
mUfsManager.get(mountId).acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
// Check that the ufsPath exists and is a directory
if (!ufs.isDirectory(ufsPath.toString())) {
throw new IOException(
ExceptionMessage.UFS_PATH_DOES_NOT_EXIST.getMessage(ufsPath.getPath()));
}
if (UnderFileSystemUtils.isWeb(ufs)) {
mountOption.setReadOnly(true);
}
}
}
private void updateMountInternal(Supplier journalContext,
LockedInodePath inodePath, AlluxioURI ufsPath, MountInfo mountInfo, MountContext context)
throws FileAlreadyExistsException, InvalidPathException, IOException {
long newMountId = IdUtils.createMountId();
// lock sync manager to ensure no sync point is added before the mount point is removed
try (LockResource r = new LockResource(mSyncManager.getSyncManagerLock())) {
List syncPoints = mSyncManager.getFilterList(mountInfo.getMountId());
if (syncPoints != null && !syncPoints.isEmpty()) {
throw new InvalidArgumentException("Updating a mount point with ActiveSync enabled is not"
+ " supported. Please remove all sync'ed paths from the mount point and try again.");
}
AlluxioURI alluxioPath = inodePath.getUri();
// validate new UFS client before updating the mount table
mUfsManager.addMount(newMountId, new AlluxioURI(ufsPath.toString()),
UnderFileSystemConfiguration.defaults(ServerConfiguration.global())
.setReadOnly(context.getOptions().getReadOnly())
.setShared(context.getOptions().getShared())
.createMountSpecificConf(context.getOptions().getPropertiesMap()));
prepareForMount(ufsPath, newMountId, context);
// old ufsClient is removed as part of the mount table update process
mMountTable.update(journalContext, alluxioPath, newMountId, context.getOptions().build());
} catch (FileAlreadyExistsException | InvalidPathException | IOException e) {
// revert everything
mUfsManager.removeMount(newMountId);
throw e;
}
}
@Override
public void updateMount(AlluxioURI alluxioPath, MountContext context)
throws FileAlreadyExistsException, FileDoesNotExistException, InvalidPathException,
IOException, AccessControlException {
LockingScheme lockingScheme = createLockingScheme(alluxioPath,
context.getOptions().getCommonOptions(), LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext = createAuditContext(
"updateMount", alluxioPath, null, inodePath.getParentInodeOrNull())) {
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
MountInfo mountInfo = mMountTable.getMountTable().get(alluxioPath.getPath());
if (mountInfo == null) {
throw new InvalidPathException("Failed to update mount properties for "
+ inodePath.getUri() + ". Please ensure the path is an existing mount point.");
}
updateMountInternal(rpcContext, inodePath, mountInfo.getUfsUri(), mountInfo, context);
auditContext.setSucceeded(true);
}
}
@Override
public void mount(AlluxioURI alluxioPath, AlluxioURI ufsPath, MountContext context)
throws FileAlreadyExistsException, FileDoesNotExistException, InvalidPathException,
IOException, AccessControlException {
Metrics.MOUNT_OPS.inc();
LockingScheme lockingScheme = createLockingScheme(alluxioPath,
context.getOptions().getCommonOptions(), LockPattern.WRITE_EDGE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("mount", alluxioPath, null, inodePath.getParentInodeOrNull())) {
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
mMountTable.checkUnderWritableMountPoint(alluxioPath);
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme, DescendantType.ONE);
mountInternal(rpcContext, inodePath, ufsPath, context);
auditContext.setSucceeded(true);
Metrics.PATHS_MOUNTED.inc();
}
}
/**
* Mounts a UFS path onto an Alluxio path.
*
* @param rpcContext the rpc context
* @param inodePath the Alluxio path to mount to
* @param ufsPath the UFS path to mount
* @param context the mount context
*/
private void mountInternal(RpcContext rpcContext, LockedInodePath inodePath, AlluxioURI ufsPath,
MountContext context) throws InvalidPathException, FileAlreadyExistsException,
FileDoesNotExistException, IOException, AccessControlException {
// Check that the Alluxio Path does not exist
if (inodePath.fullPathExists()) {
// TODO(calvin): Add a test to validate this (ALLUXIO-1831)
throw new InvalidPathException(
ExceptionMessage.MOUNT_POINT_ALREADY_EXISTS.getMessage(inodePath.getUri()));
}
long mountId = IdUtils.createMountId();
mountInternal(rpcContext, inodePath, ufsPath, mountId, context);
boolean loadMetadataSucceeded = false;
try {
// This will create the directory at alluxioPath
loadDirectoryMetadata(rpcContext, inodePath, LoadMetadataContext
.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(false)));
loadMetadataSucceeded = true;
} finally {
if (!loadMetadataSucceeded) {
mMountTable.delete(rpcContext, inodePath.getUri(), true);
}
}
}
/**
* Updates the mount table with the specified mount point. The mount options may be updated during
* this method.
*
* @param journalContext the journal context
* @param inodePath the Alluxio mount point
* @param ufsPath the UFS endpoint to mount
* @param mountId the mount id
* @param context the mount context (may be updated)
*/
private void mountInternal(Supplier journalContext, LockedInodePath inodePath,
AlluxioURI ufsPath, long mountId, MountContext context)
throws FileAlreadyExistsException, InvalidPathException, IOException {
AlluxioURI alluxioPath = inodePath.getUri();
// Adding the mount point will not create the UFS instance and thus not connect to UFS
mUfsManager.addMount(mountId, new AlluxioURI(ufsPath.toString()),
UnderFileSystemConfiguration.defaults(ServerConfiguration.global())
.setReadOnly(context.getOptions().getReadOnly())
.setShared(context.getOptions().getShared())
.createMountSpecificConf(context.getOptions().getPropertiesMap()));
try {
prepareForMount(ufsPath, mountId, context);
// Check that the alluxioPath we're creating doesn't shadow a path in the parent UFS
MountTable.Resolution resolution = mMountTable.resolve(alluxioPath);
try (CloseableResource ufsResource =
resolution.acquireUfsResource()) {
String ufsResolvedPath = resolution.getUri().getPath();
if (ufsResource.get().exists(ufsResolvedPath)) {
throw new IOException(
ExceptionMessage.MOUNT_PATH_SHADOWS_PARENT_UFS.getMessage(alluxioPath,
ufsResolvedPath));
}
}
// Add the mount point. This will only succeed if we are not mounting a prefix of an existing
// mount.
mMountTable.add(journalContext, alluxioPath, ufsPath, mountId, context.getOptions().build());
} catch (Exception e) {
mUfsManager.removeMount(mountId);
throw e;
}
}
@Override
public void unmount(AlluxioURI alluxioPath) throws FileDoesNotExistException,
InvalidPathException, IOException, AccessControlException {
Metrics.UNMOUNT_OPS.inc();
// Unmount should lock the parent to remove the child inode.
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(alluxioPath, LockPattern.WRITE_EDGE);
FileSystemMasterAuditContext auditContext =
createAuditContext("unmount", alluxioPath, null, inodePath.getInodeOrNull())) {
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
unmountInternal(rpcContext, inodePath);
auditContext.setSucceeded(true);
Metrics.PATHS_UNMOUNTED.inc();
}
}
/**
* Unmounts a UFS path previously mounted onto an Alluxio path.
*
* This method does not delete blocks. Instead, it adds the to the passed-in block deletion
* context so that the blocks can be deleted after the inode deletion journal entry has been
* written. We cannot delete blocks earlier because the inode deletion may fail, leaving us with
* inode containing deleted blocks.
*
* @param rpcContext the rpc context
* @param inodePath the Alluxio path to unmount, must be a mount point
*/
private void unmountInternal(RpcContext rpcContext, LockedInodePath inodePath)
throws InvalidPathException, FileDoesNotExistException, IOException {
if (!inodePath.fullPathExists()) {
throw new FileDoesNotExistException(
"Failed to unmount: Path " + inodePath.getUri() + " does not exist");
}
MountInfo mountInfo = mMountTable.getMountTable().get(inodePath.getUri().getPath());
if (mountInfo == null) {
throw new InvalidPathException("Failed to unmount " + inodePath.getUri() + ". Please ensure"
+ " the path is an existing mount point.");
}
mSyncManager.stopSyncForMount(mountInfo.getMountId());
if (!mMountTable.delete(rpcContext, inodePath.getUri(), true)) {
throw new InvalidPathException("Failed to unmount " + inodePath.getUri() + ". Please ensure"
+ " the path is an existing mount point and not root.");
}
try {
// Use the internal delete API, setting {@code alluxioOnly} to true to prevent the delete
// operations from being persisted in the UFS.
deleteInternal(rpcContext, inodePath, DeleteContext
.mergeFrom(DeletePOptions.newBuilder().setRecursive(true).setAlluxioOnly(true)));
} catch (DirectoryNotEmptyException e) {
throw new RuntimeException(String.format(
"We should never see this exception because %s should never be thrown when recursive "
+ "is true.",
e.getClass()));
}
}
@Override
public void setAcl(AlluxioURI path, SetAclAction action, List entries,
SetAclContext context)
throws FileDoesNotExistException, AccessControlException, InvalidPathException, IOException {
Metrics.SET_ACL_OPS.inc();
LockingScheme lockingScheme =
createLockingScheme(path, context.getOptions().getCommonOptions(), LockPattern.WRITE_INODE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath =
mInodeTree.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("setAcl", path, null, inodePath.getInodeOrNull())) {
mPermissionChecker.checkSetAttributePermission(inodePath, false, true, false);
if (context.getOptions().getRecursive()) {
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath child : descendants) {
mPermissionChecker.checkSetAttributePermission(child, false, true, false);
}
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
}
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme,
context.getOptions().getRecursive() ? DescendantType.ALL : DescendantType.NONE);
if (!inodePath.fullPathExists()) {
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(path));
}
setAclInternal(rpcContext, action, inodePath, entries, context);
auditContext.setSucceeded(true);
}
}
private void setAclInternal(RpcContext rpcContext, SetAclAction action, LockedInodePath inodePath,
List entries, SetAclContext context)
throws IOException, FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern().isWrite());
long opTimeMs = System.currentTimeMillis();
// Check inputs for setAcl
switch (action) {
case REPLACE:
Set types =
entries.stream().map(AclEntry::getType).collect(Collectors.toSet());
Set requiredTypes =
Sets.newHashSet(AclEntryType.OWNING_USER, AclEntryType.OWNING_GROUP,
AclEntryType.OTHER);
requiredTypes.removeAll(types);
// make sure the required entries are present
if (!requiredTypes.isEmpty()) {
throw new IOException(ExceptionMessage.ACL_BASE_REQUIRED.getMessage(
String.join(", ", requiredTypes.stream().map(AclEntryType::toString).collect(
Collectors.toList()))));
}
break;
case MODIFY: // fall through
case REMOVE:
if (entries.isEmpty()) {
// Nothing to do.
return;
}
break;
case REMOVE_ALL:
break;
case REMOVE_DEFAULT:
break;
default:
}
setAclRecursive(rpcContext, action, inodePath, entries, false, opTimeMs, context);
}
private void setUfsAcl(LockedInodePath inodePath)
throws InvalidPathException, AccessControlException {
Inode inode = inodePath.getInodeOrNull();
checkUfsMode(inodePath.getUri(), OperationType.WRITE);
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
String ufsUri = resolution.getUri().toString();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (ufs.isObjectStorage()) {
LOG.warn("SetACL is not supported to object storage UFS via Alluxio. "
+ "UFS: " + ufsUri + ". This has no effect on the underlying object.");
} else {
try {
List entries = new ArrayList<>(inode.getACL().getEntries());
if (inode.isDirectory()) {
entries.addAll(inode.asDirectory().getDefaultACL().getEntries());
}
ufs.setAclEntries(ufsUri, entries);
} catch (IOException e) {
throw new AccessControlException("Could not setAcl for UFS file: " + ufsUri);
}
}
}
}
private void setAclSingleInode(RpcContext rpcContext, SetAclAction action,
LockedInodePath inodePath, List entries, boolean replay, long opTimeMs)
throws IOException, FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern().isWrite());
Inode inode = inodePath.getInode();
// Check that we are not removing an extended mask.
if (action == SetAclAction.REMOVE) {
for (AclEntry entry : entries) {
if ((entry.isDefault() && inode.getDefaultACL().hasExtended())
|| (!entry.isDefault() && inode.getACL().hasExtended())) {
if (entry.getType() == AclEntryType.MASK) {
throw new InvalidArgumentException(
"Deleting the mask for an extended ACL is not allowed. entry: " + entry);
}
}
}
}
// Check that we are not setting default ACL to a file
if (inode.isFile()) {
for (AclEntry entry : entries) {
if (entry.isDefault()) {
throw new UnsupportedOperationException("Can not set default ACL for a file");
}
}
}
mInodeTree.setAcl(rpcContext, SetAclEntry.newBuilder()
.setId(inode.getId())
.setOpTimeMs(opTimeMs)
.setAction(ProtoUtils.toProto(action))
.addAllEntries(entries.stream().map(ProtoUtils::toProto).collect(Collectors.toList()))
.build());
try {
if (!replay && inode.isPersisted()) {
setUfsAcl(inodePath);
}
} catch (InvalidPathException | AccessControlException e) {
LOG.warn("Setting ufs ACL failed for path: {}", inodePath.getUri(), e);
// TODO(david): revert the acl and default acl to the initial state if writing to ufs failed.
}
}
private void setAclRecursive(RpcContext rpcContext, SetAclAction action,
LockedInodePath inodePath, List entries, boolean replay, long opTimeMs,
SetAclContext context) throws IOException, FileDoesNotExistException {
Preconditions.checkState(inodePath.getLockPattern().isWrite());
setAclSingleInode(rpcContext, action, inodePath, entries, replay, opTimeMs);
if (context.getOptions().getRecursive()) {
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath childPath : descendants) {
setAclSingleInode(rpcContext, action, childPath, entries, replay, opTimeMs);
}
}
}
}
@Override
public void setAttribute(AlluxioURI path, SetAttributeContext context)
throws FileDoesNotExistException, AccessControlException, InvalidPathException, IOException {
SetAttributePOptions.Builder options = context.getOptions();
Metrics.SET_ATTRIBUTE_OPS.inc();
// for chown
boolean rootRequired = options.hasOwner();
// for chgrp, chmod
boolean ownerRequired = (options.hasGroup()) || (options.hasMode());
// for other attributes
boolean writeRequired = !rootRequired && !ownerRequired;
if (options.hasOwner() && options.hasGroup()) {
try {
checkUserBelongsToGroup(options.getOwner(), options.getGroup());
} catch (IOException e) {
throw new IOException(String.format("Could not update owner:group for %s to %s:%s. %s",
path.toString(), options.getOwner(), options.getGroup(), e.toString()), e);
}
}
String commandName;
boolean checkWritableMountPoint = false;
if (options.hasOwner()) {
commandName = "chown";
checkWritableMountPoint = true;
} else if (options.hasGroup()) {
commandName = "chgrp";
checkWritableMountPoint = true;
} else if (options.hasMode()) {
commandName = "chmod";
checkWritableMountPoint = true;
} else {
commandName = "setAttribute";
}
LockingScheme lockingScheme =
createLockingScheme(path, options.getCommonOptions(), LockPattern.WRITE_INODE);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext(commandName, path, null, inodePath.getInodeOrNull())) {
if (checkWritableMountPoint) {
mMountTable.checkUnderWritableMountPoint(path);
}
// Force recursive sync metadata if it is a pinning and unpinning operation
boolean recursiveSync = options.hasPinned() || options.getRecursive();
// Possible ufs sync.
syncMetadata(rpcContext, inodePath, lockingScheme,
recursiveSync ? DescendantType.ALL : DescendantType.ONE);
if (!inodePath.fullPathExists()) {
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(path));
}
try {
mPermissionChecker.checkSetAttributePermission(inodePath, rootRequired, ownerRequired,
writeRequired);
if (context.getOptions().getRecursive()) {
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath childPath : descendants) {
mPermissionChecker.checkSetAttributePermission(childPath, rootRequired,
ownerRequired, writeRequired);
}
}
}
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
setAttributeInternal(rpcContext, inodePath, context);
auditContext.setSucceeded(true);
}
}
/**
* Checks whether the owner belongs to the group.
*
* @param owner the owner to check
* @param group the group to check
* @throws FailedPreconditionException if owner does not belong to group
*/
private void checkUserBelongsToGroup(String owner, String group)
throws IOException {
List groups = CommonUtils.getGroups(owner, ServerConfiguration.global());
if (groups == null || !groups.contains(group)) {
throw new FailedPreconditionException("Owner " + owner
+ " does not belong to the group " + group);
}
}
/**
* Sets the file attribute.
*
* @param rpcContext the rpc context
* @param inodePath the {@link LockedInodePath} to set attribute for
* @param context attributes to be set, see {@link SetAttributePOptions}
*/
private void setAttributeInternal(RpcContext rpcContext, LockedInodePath inodePath,
SetAttributeContext context)
throws InvalidPathException, FileDoesNotExistException, AccessControlException, IOException {
Inode targetInode = inodePath.getInode();
long opTimeMs = System.currentTimeMillis();
if (context.getOptions().getRecursive() && targetInode.isDirectory()) {
try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) {
for (LockedInodePath childPath : descendants) {
setAttributeSingleFile(rpcContext, childPath, true, opTimeMs, context);
}
}
}
setAttributeSingleFile(rpcContext, inodePath, true, opTimeMs, context);
}
@Override
public void scheduleAsyncPersistence(AlluxioURI path, ScheduleAsyncPersistenceContext context)
throws AlluxioException, UnavailableException {
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree.lockFullInodePath(path, LockPattern.WRITE_INODE)) {
scheduleAsyncPersistenceInternal(inodePath, context, rpcContext);
}
}
private void scheduleAsyncPersistenceInternal(LockedInodePath inodePath,
ScheduleAsyncPersistenceContext context, RpcContext rpcContext)
throws InvalidPathException, FileDoesNotExistException {
InodeFile inode = inodePath.getInodeFile();
if (!inode.isCompleted()) {
throw new InvalidPathException(
"Cannot persist an incomplete Alluxio file: " + inodePath.getUri());
}
if (shouldPersistPath(inodePath.toString())) {
mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder().setId(inode.getId())
.setPersistenceState(PersistenceState.TO_BE_PERSISTED.name()).build());
mPersistRequests.put(inode.getId(),
new alluxio.time.ExponentialTimer(
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_INITIAL_INTERVAL_MS),
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_INTERVAL_MS),
context.getPersistenceWaitTime(),
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_TOTAL_WAIT_TIME_MS)));
}
}
/**
* Actively sync metadata, based on a list of changed files.
*
* @param path the path to sync
* @param changedFiles collection of files that are changed under the path to sync, if this is
* null, force sync the entire directory
* @param executorService executor to execute the parallel incremental sync
*/
public void activeSyncMetadata(AlluxioURI path, Collection changedFiles,
ExecutorService executorService) throws IOException {
if (changedFiles == null) {
LOG.info("Start an active full sync of {}", path.toString());
} else {
LOG.info("Start an active incremental sync of {} files", changedFiles.size());
}
if (changedFiles != null && changedFiles.isEmpty()) {
return;
}
Map statusCache;
try (RpcContext rpcContext = createRpcContext()) {
statusCache = populateStatusCache(path, DescendantType.ALL);
if (changedFiles == null) {
LockingScheme lockingScheme = new LockingScheme(path, LockPattern.READ, true);
try (LockedInodePath inodePath =
mInodeTree.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern())) {
syncMetadataInternal(rpcContext, inodePath, lockingScheme, DescendantType.ALL,
statusCache);
}
LOG.info("Ended an active full sync of {}", path.toString());
return;
} else {
Set> callables = new HashSet<>();
for (AlluxioURI changedFile : changedFiles) {
callables.add(() -> {
LockingScheme lockingScheme = new LockingScheme(path, LockPattern.READ, true);
try (LockedInodePath changedFilePath =
mInodeTree.lockInodePath(changedFile, lockingScheme.getPattern())) {
syncMetadataInternal(rpcContext, changedFilePath, lockingScheme, DescendantType.NONE,
statusCache);
} catch (InvalidPathException e) {
LOG.info("forceSyncMetadata processed an invalid path {}", changedFile.getPath());
}
return null;
});
}
executorService.invokeAll(callables);
}
} catch (InvalidPathException e) {
LOG.warn("InvalidPathException during active sync {}", e);
} catch (InterruptedException e) {
LOG.warn("InterruptedException during active sync {}", e);
Thread.currentThread().interrupt();
return;
}
LOG.info("Ended an active incremental sync of {} files", changedFiles.size());
}
@Override
public boolean recordActiveSyncTxid(long txId, long mountId) {
MountInfo mountInfo = mMountTable.getMountInfo(mountId);
if (mountInfo == null) {
return false;
}
AlluxioURI mountPath = mountInfo.getAlluxioUri();
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockFullInodePath(mountPath, LockPattern.READ)) {
File.ActiveSyncTxIdEntry txIdEntry =
File.ActiveSyncTxIdEntry.newBuilder().setTxId(txId).setMountId(mountId).build();
rpcContext.journal(JournalEntry.newBuilder().setActiveSyncTxId(txIdEntry).build());
} catch (UnavailableException | InvalidPathException | FileDoesNotExistException e) {
LOG.warn("Exception when recording activesync txid, path {}, exception {}",
mountPath, e);
return false;
}
return true;
}
private boolean syncMetadata(RpcContext rpcContext, LockedInodePath inodePath,
LockingScheme lockingScheme, DescendantType syncDescendantType) {
boolean result;
if (!lockingScheme.shouldSync()) {
return false;
}
try {
result = syncMetadataInternal(rpcContext, inodePath, lockingScheme,
syncDescendantType, populateStatusCache(inodePath.getUri(), syncDescendantType));
} catch (Exception e) {
LOG.warn("Sync metadata for path {} encountered exception {}", inodePath.getUri(),
Throwables.getStackTraceAsString(e));
return false;
}
return result;
}
private Map populateStatusCache(AlluxioURI path,
DescendantType syncDescendantType) {
Map statusCache = new HashMap<>();
try {
MountTable.Resolution resolution = mMountTable.resolve(path);
AlluxioURI ufsUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
ListOptions listOptions = ListOptions.defaults();
// statusCache stores uri to ufsstatus mapping that is used to construct fingerprint
listOptions.setRecursive(syncDescendantType == DescendantType.ALL);
try {
UfsStatus[] children = ufs.listStatus(ufsUri.toString(), listOptions);
if (children != null) {
for (UfsStatus childStatus : children) {
statusCache.put(path.joinUnsafe(childStatus.getName()),
childStatus);
}
}
} catch (Exception e) {
LOG.debug("ListStatus failed as an preparation step for syncMetadata {}", path, e);
}
return statusCache;
}
} catch (InvalidPathException e) {
return statusCache;
}
}
/**
* Syncs the Alluxio metadata with UFS.
*
* This method expects that the last existing edge leading to inodePath has been write-locked.
*
* @param rpcContext the rpcContext
* @param inodePath the Alluxio inode path to sync with UFS
* @param lockingScheme the locking scheme used to lock the inode path
* @param syncDescendantType how to sync descendants
* @param statusCache a cache provided to the sync method which stores the UfsStatus of files
* @return true if the sync was performed successfully, false otherwise (including errors)
*/
private boolean syncMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath,
LockingScheme lockingScheme, DescendantType syncDescendantType,
Map statusCache)
throws IOException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
// The high-level process for the syncing is:
// 1. Find all Alluxio paths which are not consistent with the corresponding UFS path.
// This means the UFS path does not exist, or is different from the Alluxio metadata.
// 2. If only the metadata changed for a file or a directory, update the inode with
// new metadata from the UFS.
// 3. Delete any Alluxio path whose content is not consistent with UFS, or not in UFS. After
// this step, all the paths in Alluxio are consistent with UFS, and there may be additional
// UFS paths to load.
// 4. Load metadata from UFS.
Set pathsToLoad = new HashSet<>();
try {
if (!inodePath.fullPathExists()) {
// The requested path does not exist in Alluxio, so just load metadata.
pathsToLoad.add(inodePath.getUri().getPath());
} else {
SyncResult result =
syncInodeMetadata(rpcContext, inodePath, syncDescendantType, statusCache);
if (result.getDeletedInode()) {
// If the inode was deleted, then the inode path should reflect the delete.
inodePath.removeLastInode();
}
pathsToLoad = result.getPathsToLoad();
}
} catch (InvalidPathException | FileDoesNotExistException | AccessControlException e) {
LOG.warn("Exception encountered when syncing metadata for {}, exception is {}",
inodePath.getUri(), e);
return false;
} finally {
inodePath.downgradeToPattern(lockingScheme.getDesiredPattern());
}
// Update metadata for all the mount points
for (String mountPoint : pathsToLoad) {
AlluxioURI mountPointUri = new AlluxioURI(mountPoint);
try {
if (PathUtils.hasPrefix(inodePath.getUri().getPath(), mountPointUri.getPath())) {
// one of the mountpoint is above the original inodePath, we start loading from the
// original inodePath. It is already locked. so we proceed to load metadata.
try {
loadMetadataInternal(rpcContext, inodePath,
LoadMetadataContext
.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(true)
.setLoadDescendantType(GrpcUtils.toProto(syncDescendantType))));
mUfsSyncPathCache.notifySyncedPath(inodePath.getUri().getPath(), syncDescendantType);
} catch (Exception e) {
// This may be expected. For example, when creating a new file, the UFS file is not
// expected to exist.
LOG.debug("Failed to load metadata for path: {}", inodePath.getUri(), e);
continue;
}
} else {
try (LockedInodePath descendantPath =
inodePath.lockDescendant(mountPointUri, LockPattern.READ)) {
try {
loadMetadataInternal(rpcContext, descendantPath,
LoadMetadataContext
.mergeFrom(LoadMetadataPOptions.newBuilder().setCreateAncestors(true)
.setLoadDescendantType(GrpcUtils.toProto(syncDescendantType))));
} catch (Exception e) {
LOG.debug("Failed to load metadata for mount point: {}", mountPointUri, e);
}
mUfsSyncPathCache.notifySyncedPath(mountPoint, syncDescendantType);
}
}
} catch (InvalidPathException e) {
LOG.warn("Tried to update metadata from an invalid path : {}", mountPointUri.getPath(), e);
}
}
try {
// Re-traverse to pick up newly created inodes on the path.
inodePath.traverse();
} catch (InvalidPathException e) {
throw new RuntimeException(e);
}
if (pathsToLoad.isEmpty()) {
mUfsSyncPathCache.notifySyncedPath(inodePath.getUri().getPath(), syncDescendantType);
}
return true;
}
@VisibleForTesting
ReadOnlyInodeStore getInodeStore() {
return mInodeStore;
}
/**
* This class represents the result for a sync. The following are returned:
* - deleted: if true, the inode was already deleted as part of the syncing process
* - pathsToLoad: a set of paths that need to be loaded from UFS.
*/
private static class SyncResult {
private boolean mDeletedInode;
private Set mPathsToLoad;
static SyncResult defaults() {
return new SyncResult(false, new HashSet<>());
}
SyncResult(boolean deletedInode, Set pathsToLoad) {
mDeletedInode = deletedInode;
mPathsToLoad = new HashSet<>(pathsToLoad);
}
boolean getDeletedInode() {
return mDeletedInode;
}
Set getPathsToLoad() {
return mPathsToLoad;
}
}
/**
* Syncs an inode with the UFS.
*
* @param rpcContext the rpc context
* @param inodePath the Alluxio inode path to sync with UFS
* @param syncDescendantType how to sync descendants
* @param statusCache a pre-populated cache of ufs statuses that can be used to construct
* fingerprint
* @return the result of the sync, including if the inode was deleted, and if further load
* metadata is required
*/
private SyncResult syncInodeMetadata(RpcContext rpcContext, LockedInodePath inodePath,
DescendantType syncDescendantType, Map statusCache)
throws FileDoesNotExistException, InvalidPathException, IOException, AccessControlException {
Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE);
// Set to true if the given inode was deleted.
boolean deletedInode = false;
// Set of paths to sync
Set pathsToLoad = new HashSet<>();
LOG.debug("Syncing inode metadata {}", inodePath.getUri());
// The options for deleting.
DeleteContext syncDeleteContext = DeleteContext.mergeFrom(
DeletePOptions.newBuilder().setRecursive(true).setAlluxioOnly(true).setUnchecked(true));
// The requested path already exists in Alluxio.
Inode inode = inodePath.getInode();
if (inode instanceof InodeFile && !inode.asFile().isCompleted()) {
// Do not sync an incomplete file, since the UFS file is expected to not exist.
return SyncResult.defaults();
}
Optional persistingLock = mInodeLockManager.tryAcquirePersistingLock(inode.getId());
if (!persistingLock.isPresent()) {
// Do not sync a file in the process of being persisted, since the UFS file is being
// written.
return SyncResult.defaults();
}
persistingLock.get().close();
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
AlluxioURI ufsUri = resolution.getUri();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
String ufsFingerprint;
Fingerprint ufsFpParsed;
UfsStatus cachedStatus = statusCache.get(inodePath.getUri());
if (cachedStatus == null) {
// TODO(david): change the interface so that getFingerprint returns a parsed fingerprint
ufsFingerprint = ufs.getFingerprint(ufsUri.toString());
ufsFpParsed = Fingerprint.parse(ufsFingerprint);
} else {
Pair aclPair
= ufs.getAclPair(ufsUri.toString());
if (aclPair == null || aclPair.getFirst() == null || !aclPair.getFirst().hasExtended()) {
ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus);
ufsFingerprint = ufsFpParsed.serialize();
} else {
ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus,
aclPair.getFirst());
ufsFingerprint = ufsFpParsed.serialize();
}
}
boolean containsMountPoint = mMountTable.containsMountPoint(inodePath.getUri(), true);
UfsSyncUtils.SyncPlan syncPlan =
UfsSyncUtils.computeSyncPlan(inode, ufsFpParsed, containsMountPoint);
if (syncPlan.toUpdateMetaData()) {
// UpdateMetadata is used when a file or a directory only had metadata change.
// It works by calling SetAttributeInternal on the inodePath.
if (ufsFpParsed.isValid()) {
short mode = Short.parseShort(ufsFpParsed.getTag(Tag.MODE));
long opTimeMs = System.currentTimeMillis();
setAttributeSingleFile(rpcContext, inodePath, false, opTimeMs, SetAttributeContext
.mergeFrom(SetAttributePOptions.newBuilder().setOwner(ufsFpParsed.getTag(Tag.OWNER))
.setGroup(ufsFpParsed.getTag(Tag.GROUP)).setMode(new Mode(mode).toProto()))
.setUfsFingerprint(ufsFingerprint));
}
}
if (syncPlan.toDelete()) {
try {
deleteInternal(rpcContext, inodePath, syncDeleteContext);
deletedInode = true;
} catch (DirectoryNotEmptyException | IOException e) {
// Should not happen, since it is an unchecked delete.
LOG.error("Unexpected error for unchecked delete.", e);
}
}
if (syncPlan.toLoadMetadata()) {
AlluxioURI mountUri = new AlluxioURI(mMountTable.getMountPoint(inodePath.getUri()));
pathsToLoad.add(mountUri.getPath());
}
if (syncPlan.toSyncChildren() && inode.isDirectory()
&& syncDescendantType != DescendantType.NONE) {
// maps children name to inode
Map inodeChildren = new HashMap<>();
for (Inode child : mInodeStore.getChildren(inode.asDirectory())) {
inodeChildren.put(child.getName(), child);
}
UfsStatus[] listStatus = ufs.listStatus(ufsUri.toString(),
ListOptions.defaults());
// Iterate over UFS listings and process UFS children.
if (listStatus != null) {
for (UfsStatus ufsChildStatus : listStatus) {
if (!inodeChildren.containsKey(ufsChildStatus.getName()) && !PathUtils
.isTemporaryFileName(ufsChildStatus.getName())) {
// Ufs child exists, but Alluxio child does not. Must load metadata.
AlluxioURI mountUri = new AlluxioURI(mMountTable.getMountPoint(inodePath.getUri()));
pathsToLoad.add(mountUri.getPath());
break;
}
}
}
// Iterate over Alluxio children and process persisted children.
for (Map.Entry inodeEntry : inodeChildren.entrySet()) {
if (!inodeEntry.getValue().isPersisted()) {
// Ignore non-persisted inodes.
continue;
}
// Technically we don't need to lock here since inodePath is already write-locked. We can
// improve this by implementing a way to traverse an inode path without locking.
try (LockedInodePath descendant = inodePath.lockDescendant(
inodePath.getUri().joinUnsafe(inodeEntry.getKey()), LockPattern.WRITE_EDGE)) {
// Recursively sync children
if (syncDescendantType != DescendantType.ALL) {
syncDescendantType = DescendantType.NONE;
}
SyncResult syncResult =
syncInodeMetadata(rpcContext, descendant, syncDescendantType, statusCache);
pathsToLoad.addAll(syncResult.getPathsToLoad());
}
}
}
}
return new SyncResult(deletedInode, pathsToLoad);
}
@Override
public FileSystemCommand workerHeartbeat(long workerId, List persistedFiles,
WorkerHeartbeatContext context) throws IOException {
List persistedUfsFingerprints = context.getOptions().getPersistedFileFingerprintsList();
boolean hasPersistedFingerprints = persistedUfsFingerprints.size() == persistedFiles.size();
for (int i = 0; i < persistedFiles.size(); i++) {
long fileId = persistedFiles.get(i);
String ufsFingerprint = hasPersistedFingerprints ? persistedUfsFingerprints.get(i) :
Constants.INVALID_UFS_FINGERPRINT;
try {
// Permission checking for each file is performed inside setAttribute
setAttribute(getPath(fileId),
SetAttributeContext
.mergeFrom(SetAttributePOptions.newBuilder().setPersisted(true))
.setUfsFingerprint(ufsFingerprint));
} catch (FileDoesNotExistException | AccessControlException | InvalidPathException e) {
LOG.error("Failed to set file {} as persisted, because {}", fileId, e);
}
}
// TODO(zac) Clean up master and worker code since this is taken care of by job service now.
// Worker should not persist any files. Instead, files are persisted through job service.
List filesToPersist = new ArrayList<>();
FileSystemCommandOptions commandOptions = new FileSystemCommandOptions();
commandOptions.setPersistOptions(new PersistCommandOptions(filesToPersist));
return new FileSystemCommand(CommandType.Persist, commandOptions);
}
/**
* @param inodePath the {@link LockedInodePath} to use
* @param updateUfs whether to update the UFS with the attribute change
* @param opTimeMs the operation time (in milliseconds)
* @param context the method context
*/
private void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath inodePath,
boolean updateUfs, long opTimeMs, SetAttributeContext context)
throws FileDoesNotExistException, InvalidPathException, AccessControlException {
Inode inode = inodePath.getInode();
SetAttributePOptions.Builder protoOptions = context.getOptions();
if (protoOptions.hasPinned()) {
mInodeTree.setPinned(rpcContext, inodePath, context.getOptions().getPinned(),
context.getOptions().getPinnedMediaList(), opTimeMs);
}
UpdateInodeEntry.Builder entry = UpdateInodeEntry.newBuilder().setId(inode.getId());
if (protoOptions.hasReplicationMax() || protoOptions.hasReplicationMin()) {
Integer replicationMax =
protoOptions.hasReplicationMax() ? protoOptions.getReplicationMax() : null;
Integer replicationMin =
protoOptions.hasReplicationMin() ? protoOptions.getReplicationMin() : null;
mInodeTree.setReplication(rpcContext, inodePath, replicationMax, replicationMin, opTimeMs);
}
// protoOptions may not have both fields set
if (protoOptions.hasCommonOptions()) {
FileSystemMasterCommonPOptions commonOpts = protoOptions.getCommonOptions();
TtlAction action = commonOpts.hasTtlAction() ? commonOpts.getTtlAction() : null;
Long ttl = commonOpts.hasTtl() ? commonOpts.getTtl() : null;
boolean modified = false;
if (ttl != null && inode.getTtl() != ttl) {
entry.setTtl(ttl);
modified = true;
}
if (action != null && inode.getTtlAction() != action) {
entry.setTtlAction(ProtobufUtils.toProtobuf(action));
modified = true;
}
if (modified) {
entry.setLastModificationTimeMs(opTimeMs);
}
}
if (protoOptions.hasPersisted()) {
Preconditions.checkArgument(inode.isFile(), PreconditionMessage.PERSIST_ONLY_FOR_FILE);
Preconditions.checkArgument(inode.asFile().isCompleted(),
PreconditionMessage.FILE_TO_PERSIST_MUST_BE_COMPLETE);
// TODO(manugoyal) figure out valid behavior in the un-persist case
Preconditions
.checkArgument(protoOptions.getPersisted(), PreconditionMessage.ERR_SET_STATE_UNPERSIST);
if (!inode.asFile().isPersisted()) {
entry.setPersistenceState(PersistenceState.PERSISTED.name());
entry.setLastModificationTimeMs(context.getOperationTimeMs());
propagatePersistedInternal(rpcContext, inodePath);
Metrics.FILES_PERSISTED.inc();
}
}
boolean ownerGroupChanged = (protoOptions.hasOwner()) || (protoOptions.hasGroup());
boolean modeChanged = protoOptions.hasMode();
// If the file is persisted in UFS, also update corresponding owner/group/permission.
if ((ownerGroupChanged || modeChanged) && updateUfs && inode.isPersisted()) {
if ((inode instanceof InodeFile) && !inode.asFile().isCompleted()) {
LOG.debug("Alluxio does not propagate chown/chgrp/chmod to UFS for incomplete files.");
} else {
checkUfsMode(inodePath.getUri(), OperationType.WRITE);
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
String ufsUri = resolution.getUri().toString();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (ufs.isObjectStorage()) {
LOG.debug("setOwner/setMode is not supported to object storage UFS via Alluxio. "
+ "UFS: " + ufsUri + ". This has no effect on the underlying object.");
} else {
String owner = null;
String group = null;
String mode = null;
if (ownerGroupChanged) {
try {
owner =
protoOptions.getOwner() != null ? protoOptions.getOwner() : inode.getOwner();
group =
protoOptions.getGroup() != null ? protoOptions.getGroup() : inode.getGroup();
ufs.setOwner(ufsUri, owner, group);
} catch (IOException e) {
throw new AccessControlException("Could not setOwner for UFS file " + ufsUri
+ " . Aborting the setAttribute operation in Alluxio.", e);
}
}
if (modeChanged) {
try {
mode = String.valueOf(protoOptions.getMode());
ufs.setMode(ufsUri, ModeUtils.protoToShort(protoOptions.getMode()));
} catch (IOException e) {
throw new AccessControlException("Could not setMode for UFS file " + ufsUri
+ " . Aborting the setAttribute operation in Alluxio.", e);
}
}
// Retrieve the ufs fingerprint after the ufs changes.
String existingFingerprint = inode.getUfsFingerprint();
if (!existingFingerprint.equals(Constants.INVALID_UFS_FINGERPRINT)) {
// Update existing fingerprint, since contents did not change
Fingerprint fp = Fingerprint.parse(existingFingerprint);
fp.putTag(Fingerprint.Tag.OWNER, owner);
fp.putTag(Fingerprint.Tag.GROUP, group);
fp.putTag(Fingerprint.Tag.MODE, mode);
context.setUfsFingerprint(fp.serialize());
} else {
// Need to retrieve the fingerprint from ufs.
context.setUfsFingerprint(ufs.getFingerprint(ufsUri));
}
}
}
}
}
if (!context.getUfsFingerprint().equals(Constants.INVALID_UFS_FINGERPRINT)) {
entry.setUfsFingerprint(context.getUfsFingerprint());
}
// Only commit the set permission to inode after the propagation to UFS succeeded.
if (protoOptions.hasOwner()) {
entry.setOwner(protoOptions.getOwner());
}
if (protoOptions.hasGroup()) {
entry.setGroup(protoOptions.getGroup());
}
if (modeChanged) {
entry.setMode(ModeUtils.protoToShort(protoOptions.getMode()));
}
mInodeTree.updateInode(rpcContext, entry.build());
}
@Override
public List getSyncPathList() {
return mSyncManager.getSyncPathList();
}
private void startSyncAndJournal(RpcContext rpcContext, AlluxioURI uri)
throws InvalidPathException, IOException {
try (LockResource r = new LockResource(mSyncManager.getSyncManagerLock())) {
MountTable.Resolution resolution = mMountTable.resolve(uri);
long mountId = resolution.getMountId();
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
if (!ufsResource.get().supportsActiveSync()) {
throw new UnsupportedOperationException(
"Active Syncing is not supported on this UFS type: "
+ ufsResource.get().getUnderFSType());
}
}
if (mSyncManager.isActivelySynced(uri)) {
throw new InvalidPathException("URI " + uri + " is already a sync point");
}
AddSyncPointEntry addSyncPoint =
AddSyncPointEntry.newBuilder()
.setSyncpointPath(uri.toString())
.setMountId(mountId)
.build();
mSyncManager.applyAndJournal(rpcContext, addSyncPoint);
try {
mSyncManager.startSyncPostJournal(uri);
} catch (Throwable e) {
LOG.warn("Start sync failed on {}", uri, e);
// revert state;
RemoveSyncPointEntry removeSyncPoint =
File.RemoveSyncPointEntry.newBuilder()
.setSyncpointPath(uri.toString()).build();
mSyncManager.applyAndJournal(rpcContext, removeSyncPoint);
mSyncManager.recoverFromStartSync(uri, resolution.getMountId());
throw e;
}
}
}
@Override
public void startSync(AlluxioURI syncPoint)
throws IOException, InvalidPathException, AccessControlException, ConnectionFailedException {
LockingScheme lockingScheme = new LockingScheme(syncPoint, LockPattern.WRITE_EDGE, true);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath = mInodeTree
.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("startSync", syncPoint, null,
inodePath.getParentInodeOrNull())) {
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
startSyncAndJournal(rpcContext, syncPoint);
auditContext.setSucceeded(true);
}
}
private void stopSyncAndJournal(RpcContext rpcContext,
LockingScheme lockingScheme, LockedInodePath lockedInodePath)
throws IOException, InvalidPathException {
try (LockResource r = new LockResource(mSyncManager.getSyncManagerLock())) {
MountTable.Resolution resolution = mSyncManager.resolveSyncPoint(lockedInodePath.getUri());
if (resolution == null) {
throw new InvalidPathException(lockedInodePath.getUri() + " is not a sync point.");
}
AlluxioURI uri = lockedInodePath.getUri();
RemoveSyncPointEntry removeSyncPoint = File.RemoveSyncPointEntry.newBuilder()
.setSyncpointPath(lockedInodePath.getUri().toString())
.setMountId(resolution.getMountId())
.build();
mSyncManager.applyAndJournal(rpcContext, removeSyncPoint);
try {
long mountId = resolution.getMountId();
mSyncManager.stopSyncPostJournal(lockedInodePath.getUri());
} catch (Throwable e) {
LOG.warn("Stop sync failed on {}", uri, e);
// revert state;
AddSyncPointEntry addSyncPoint =
File.AddSyncPointEntry.newBuilder()
.setSyncpointPath(uri.toString()).build();
mSyncManager.applyAndJournal(rpcContext, addSyncPoint);
mSyncManager.recoverFromStopSync(uri, resolution.getMountId());
throw e;
}
}
}
@Override
public void stopSync(AlluxioURI syncPoint)
throws IOException, InvalidPathException, AccessControlException {
LockingScheme lockingScheme = new LockingScheme(syncPoint, LockPattern.WRITE_EDGE, false);
try (RpcContext rpcContext = createRpcContext();
LockedInodePath inodePath =
mInodeTree.lockInodePath(lockingScheme.getPath(), lockingScheme.getPattern());
FileSystemMasterAuditContext auditContext =
createAuditContext("stopSync", syncPoint, null,
inodePath.getParentInodeOrNull())) {
try {
mPermissionChecker.checkParentPermission(Mode.Bits.WRITE, inodePath);
} catch (AccessControlException e) {
auditContext.setAllowed(false);
throw e;
}
stopSyncAndJournal(rpcContext, lockingScheme, inodePath);
auditContext.setSucceeded(true);
}
}
@Override
public List getWorkerInfoList() throws UnavailableException {
return mBlockMaster.getWorkerInfoList();
}
/**
* @param fileId file ID
* @param jobId persist job ID
* @param persistenceWaitTime persistence initial wait time
* @param uri Alluxio Uri of the file
* @param tempUfsPath temp UFS path
*/
private void addPersistJob(long fileId, long jobId, long persistenceWaitTime, AlluxioURI uri,
String tempUfsPath) {
alluxio.time.ExponentialTimer timer = mPersistRequests.remove(fileId);
if (timer == null) {
timer = new alluxio.time.ExponentialTimer(
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_INITIAL_INTERVAL_MS),
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_INTERVAL_MS),
persistenceWaitTime,
ServerConfiguration.getMs(PropertyKey.MASTER_PERSISTENCE_MAX_TOTAL_WAIT_TIME_MS));
}
mPersistJobs.put(fileId, new PersistJob(jobId, fileId, uri, tempUfsPath, timer));
}
private long getPersistenceWaitTime(long shouldPersistTime) {
long currentTime = System.currentTimeMillis();
if (shouldPersistTime >= currentTime) {
return shouldPersistTime - currentTime;
} else {
return 0;
}
}
/**
* Periodically schedules jobs to persist files and updates metadata accordingly.
*/
@NotThreadSafe
private final class PersistenceScheduler implements alluxio.heartbeat.HeartbeatExecutor {
private static final long MAX_QUIET_PERIOD_SECONDS = 64;
/**
* Quiet period for job service flow control (in seconds). When job service refuses starting new
* jobs, we use exponential backoff to alleviate the job service pressure.
*/
private long mQuietPeriodSeconds;
/**
* Creates a new instance of {@link PersistenceScheduler}.
*/
PersistenceScheduler() {
mQuietPeriodSeconds = 0;
}
@Override
public void close() {} // Nothing to clean up
/**
* Updates the file system metadata to reflect the fact that the persist file request expired.
*
* @param fileId the file ID
*/
private void handleExpired(long fileId) throws AlluxioException, UnavailableException {
try (JournalContext journalContext = createJournalContext();
LockedInodePath inodePath = mInodeTree
.lockFullInodePath(fileId, LockPattern.WRITE_INODE)) {
InodeFile inode = inodePath.getInodeFile();
switch (inode.getPersistenceState()) {
case LOST:
// fall through
case NOT_PERSISTED:
// fall through
case PERSISTED:
LOG.warn("File {} (id={}) persistence state is {} and will not be changed.",
inodePath.getUri(), fileId, inode.getPersistenceState());
return;
case TO_BE_PERSISTED:
mInodeTree.updateInode(journalContext, UpdateInodeEntry.newBuilder()
.setId(inode.getId())
.setPersistenceState(PersistenceState.NOT_PERSISTED.name())
.build());
mInodeTree.updateInodeFile(journalContext, UpdateInodeFileEntry.newBuilder()
.setId(inode.getId())
.setPersistJobId(Constants.PERSISTENCE_INVALID_JOB_ID)
.setTempUfsPath(Constants.PERSISTENCE_INVALID_UFS_PATH)
.build());
break;
default:
throw new IllegalStateException(
"Unrecognized persistence state: " + inode.getPersistenceState());
}
}
}
/**
* Attempts to schedule a persist job and updates the file system metadata accordingly.
*
* @param fileId the file ID
*/
private void handleReady(long fileId) throws AlluxioException, IOException {
alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId);
// Lookup relevant file information.
AlluxioURI uri;
String tempUfsPath;
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ)) {
InodeFile inode = inodePath.getInodeFile();
uri = inodePath.getUri();
switch (inode.getPersistenceState()) {
case LOST:
// fall through
case NOT_PERSISTED:
// fall through
case PERSISTED:
LOG.warn("File {} (id={}) persistence state is {} and will not be changed.",
inodePath.getUri(), fileId, inode.getPersistenceState());
return;
case TO_BE_PERSISTED:
tempUfsPath = inodePath.getInodeFile().getTempUfsPath();
break;
default:
throw new IllegalStateException(
"Unrecognized persistence state: " + inode.getPersistenceState());
}
}
MountTable.Resolution resolution = mMountTable.resolve(uri);
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
// If previous persist job failed, clean up the temporary file.
cleanup(ufsResource.get(), tempUfsPath);
// Generate a temporary path to be used by the persist job.
// If the persist destination is on object store, let persist job copy files to destination
// directly
if (ServerConfiguration.getBoolean(PropertyKey.MASTER_UNSAFE_DIRECT_PERSIST_OBJECT_ENABLED)
&& ufsResource.get().isObjectStorage()) {
tempUfsPath = resolution.getUri().toString();
} else {
tempUfsPath = PathUtils.temporaryFileName(
System.currentTimeMillis(), resolution.getUri().toString());
}
}
alluxio.job.persist.PersistConfig config =
new alluxio.job.persist.PersistConfig(uri.getPath(), resolution.getMountId(), false,
tempUfsPath);
// Schedule the persist job.
long jobId;
JobMasterClient client = mJobMasterClientPool.acquire();
try {
jobId = client.run(config);
} finally {
mJobMasterClientPool.release(client);
}
mQuietPeriodSeconds /= 2;
mPersistJobs.put(fileId, new PersistJob(jobId, fileId, uri, tempUfsPath, timer));
// Update the inode and journal the change.
try (JournalContext journalContext = createJournalContext();
LockedInodePath inodePath = mInodeTree
.lockFullInodePath(fileId, LockPattern.WRITE_INODE)) {
InodeFile inode = inodePath.getInodeFile();
mInodeTree.updateInodeFile(journalContext, UpdateInodeFileEntry.newBuilder()
.setId(inode.getId())
.setPersistJobId(jobId)
.setTempUfsPath(tempUfsPath)
.build());
}
}
/**
* {@inheritDoc}
*
* The method iterates through the set of files to be persisted (identified by their ID) and
* attempts to schedule a file persist job. Each iteration removes the file ID from the set
* of files to be persisted unless the execution sets the {@code remove} flag to false.
*
* @throws InterruptedException if the thread is interrupted
*/
@Override
public void heartbeat() throws InterruptedException {
java.util.concurrent.TimeUnit.SECONDS.sleep(mQuietPeriodSeconds);
// Process persist requests.
for (long fileId : mPersistRequests.keySet()) {
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("PersistenceScheduler interrupted.");
}
boolean remove = true;
alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId);
if (timer == null) {
// This could occur if a key is removed from mPersistRequests while we are iterating.
continue;
}
alluxio.time.ExponentialTimer.Result timerResult = timer.tick();
if (timerResult == alluxio.time.ExponentialTimer.Result.NOT_READY) {
// operation is not ready to be scheduled
continue;
}
AlluxioURI uri = null;
try {
try (LockedInodePath inodePath = mInodeTree
.lockFullInodePath(fileId, LockPattern.READ)) {
uri = inodePath.getUri();
}
try {
checkUfsMode(uri, OperationType.WRITE);
} catch (Exception e) {
LOG.warn("Unable to schedule persist request for path {}: {}", uri, e.getMessage());
// Retry when ufs mode permits operation
remove = false;
continue;
}
switch (timerResult) {
case EXPIRED:
handleExpired(fileId);
break;
case READY:
handleReady(fileId);
break;
default:
throw new IllegalStateException("Unrecognized timer state: " + timerResult);
}
} catch (FileDoesNotExistException | InvalidPathException e) {
LOG.warn("The file {} (id={}) to be persisted was not found : {}", uri, fileId,
e.getMessage());
LOG.debug("Exception: ", e);
} catch (UnavailableException e) {
LOG.warn("Failed to persist file {}, will retry later: {}", uri, e.toString());
remove = false;
} catch (ResourceExhaustedException e) {
LOG.warn("The job service is busy, will retry later: {}", e.toString());
LOG.debug("Exception: ", e);
mQuietPeriodSeconds = (mQuietPeriodSeconds == 0) ? 1 :
Math.min(MAX_QUIET_PERIOD_SECONDS, mQuietPeriodSeconds * 2);
remove = false;
// End the method here until the next heartbeat. No more jobs should be scheduled during
// the current heartbeat if the job master is at full capacity.
return;
} catch (Exception e) {
LOG.warn("Unexpected exception encountered when scheduling the persist job for file {} "
+ "(id={}) : {}", uri, fileId, e.getMessage());
LOG.debug("Exception: ", e);
} finally {
if (remove) {
mPersistRequests.remove(fileId);
}
}
}
}
}
/**
* Periodically polls for the result of the jobs and updates metadata accordingly.
*/
@NotThreadSafe
private final class PersistenceChecker implements alluxio.heartbeat.HeartbeatExecutor {
/**
* Creates a new instance of {@link PersistenceChecker}.
*/
PersistenceChecker() {}
@Override
public void close() {} // nothing to clean up
/**
* Updates the file system metadata to reflect the fact that the persist job succeeded.
*
* NOTE: It is the responsibility of the caller to update {@link #mPersistJobs}.
*
* @param job the successful job
*/
private void handleSuccess(PersistJob job) {
long fileId = job.getFileId();
String tempUfsPath = job.getTempUfsPath();
List blockIds = new ArrayList<>();
UfsManager.UfsClient ufsClient = null;
try (JournalContext journalContext = createJournalContext();
LockedInodePath inodePath = mInodeTree
.lockFullInodePath(fileId, LockPattern.WRITE_INODE)) {
InodeFile inode = inodePath.getInodeFile();
MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri());
ufsClient = mUfsManager.get(resolution.getMountId());
switch (inode.getPersistenceState()) {
case LOST:
// fall through
case NOT_PERSISTED:
// fall through
case PERSISTED:
LOG.warn("File {} (id={}) persistence state is {}. Successful persist has no effect.",
job.getUri(), fileId, inode.getPersistenceState());
break;
case TO_BE_PERSISTED:
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
String ufsPath = resolution.getUri().toString();
if (!ufsPath.equals(tempUfsPath)) {
// Make rename only when tempUfsPath is different from final ufsPath. Note that,
// on object store, we take the optimization to skip the rename by having
// tempUfsPath the same as final ufsPath.
if (!ufs.renameRenamableFile(tempUfsPath, ufsPath)) {
throw new IOException(
String.format("Failed to rename %s to %s.", tempUfsPath, ufsPath));
}
}
ufs.setOwner(ufsPath, inode.getOwner(), inode.getGroup());
ufs.setMode(ufsPath, inode.getMode());
}
mInodeTree.updateInodeFile(journalContext, UpdateInodeFileEntry.newBuilder()
.setId(inode.getId())
.setPersistJobId(Constants.PERSISTENCE_INVALID_JOB_ID)
.setTempUfsPath(Constants.PERSISTENCE_INVALID_UFS_PATH)
.build());
mInodeTree.updateInode(journalContext, UpdateInodeEntry.newBuilder()
.setId(inode.getId())
.setPersistenceState(PersistenceState.PERSISTED.name())
.build());
propagatePersistedInternal(journalContext, inodePath);
Metrics.FILES_PERSISTED.inc();
// Save state for possible cleanup
blockIds.addAll(inode.getBlockIds());
break;
default:
throw new IllegalStateException(
"Unrecognized persistence state: " + inode.getPersistenceState());
}
} catch (FileDoesNotExistException | InvalidPathException e) {
LOG.warn("The file {} (id={}) to be persisted was not found: {}", job.getUri(), fileId,
e.getMessage());
LOG.debug("Exception: ", e);
// Cleanup the temporary file.
if (ufsClient != null) {
try (CloseableResource ufsResource = ufsClient.acquireUfsResource()) {
cleanup(ufsResource.get(), tempUfsPath);
}
}
} catch (Exception e) {
LOG.warn(
"Unexpected exception encountered when trying to complete persistence of a file {} "
+ "(id={}) : {}",
job.getUri(), fileId, e.getMessage());
LOG.debug("Exception: ", e);
if (ufsClient != null) {
try (CloseableResource ufsResource = ufsClient.acquireUfsResource()) {
cleanup(ufsResource.get(), tempUfsPath);
}
}
mPersistRequests.put(fileId, job.getTimer());
}
// Cleanup possible staging UFS blocks files due to fast durable write fallback.
// Note that this is best effort
if (ufsClient != null) {
for (long blockId : blockIds) {
String ufsBlockPath = alluxio.worker.BlockUtils.getUfsBlockPath(ufsClient, blockId);
try (CloseableResource ufsResource = ufsClient.acquireUfsResource()) {
alluxio.util.UnderFileSystemUtils.deleteFileIfExists(ufsResource.get(), ufsBlockPath);
} catch (Exception e) {
LOG.warn("Failed to clean up staging UFS block file {}", ufsBlockPath, e.getMessage());
}
}
}
}
@Override
public void heartbeat() throws InterruptedException {
boolean queueEmpty = mPersistCheckerPool.getQueue().isEmpty();
// Check the progress of persist jobs.
for (long fileId : mPersistJobs.keySet()) {
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("PersistenceChecker interrupted.");
}
final PersistJob job = mPersistJobs.get(fileId);
if (job == null) {
// This could happen if a key is removed from mPersistJobs while we are iterating.
continue;
}
// Cancel any jobs marked as canceled
switch (job.getCancelState()) {
case NOT_CANCELED:
break;
case TO_BE_CANCELED:
// Send the message to cancel this job
JobMasterClient client = mJobMasterClientPool.acquire();
try {
client.cancel(job.getId());
job.setCancelState(PersistJob.CancelState.CANCELING);
} catch (alluxio.exception.status.NotFoundException e) {
LOG.warn("Persist job (id={}) for file {} (id={}) to cancel was not found: {}",
job.getId(), job.getUri(), fileId, e.getMessage());
LOG.debug("Exception: ", e);
mPersistJobs.remove(fileId);
continue;
} catch (Exception e) {
LOG.warn("Unexpected exception encountered when cancelling a persist job (id={}) for "
+ "file {} (id={}) : {}", job.getId(), job.getUri(), fileId, e.getMessage());
LOG.debug("Exception: ", e);
} finally {
mJobMasterClientPool.release(client);
}
continue;
case CANCELING:
break;
default:
throw new IllegalStateException("Unrecognized cancel state: " + job.getCancelState());
}
if (!queueEmpty) {
// There are tasks waiting in the queue, so do not try to schedule anything
continue;
}
long jobId = job.getId();
JobMasterClient client = mJobMasterClientPool.acquire();
try {
alluxio.job.wire.JobInfo jobInfo = client.getStatus(jobId);
switch (jobInfo.getStatus()) {
case RUNNING:
// fall through
case CREATED:
break;
case FAILED:
LOG.warn("The persist job (id={}) for file {} (id={}) failed: {}", jobId,
job.getUri(), fileId, jobInfo.getErrorMessage());
mPersistJobs.remove(fileId);
mPersistRequests.put(fileId, job.getTimer());
break;
case CANCELED:
mPersistJobs.remove(fileId);
break;
case COMPLETED:
mPersistJobs.remove(fileId);
mPersistCheckerPool.execute(() -> handleSuccess(job));
break;
default:
throw new IllegalStateException("Unrecognized job status: " + jobInfo.getStatus());
}
} catch (Exception e) {
LOG.warn("Exception encountered when trying to retrieve the status of a "
+ " persist job (id={}) for file {} (id={}): {}.", jobId, job.getUri(), fileId,
e.getMessage());
LOG.debug("Exception: ", e);
mPersistJobs.remove(fileId);
mPersistRequests.put(fileId, job.getTimer());
} finally {
mJobMasterClientPool.release(client);
}
}
}
}
@NotThreadSafe
private final class TimeSeriesRecorder implements alluxio.heartbeat.HeartbeatExecutor {
@Override
public void heartbeat() throws InterruptedException {
// TODO(calvin): Provide a better way to keep track of metrics collected as time series
MetricRegistry registry = MetricsSystem.METRIC_REGISTRY;
// % Alluxio space used
Long masterCapacityTotal = (Long) registry.getGauges()
.get(MetricsSystem.getMetricName(DefaultBlockMaster.Metrics.CAPACITY_TOTAL)).getValue();
Long masterCapacityUsed = (Long) registry.getGauges()
.get(MetricsSystem.getMetricName(DefaultBlockMaster.Metrics.CAPACITY_USED)).getValue();
int percentAlluxioSpaceUsed =
(masterCapacityTotal > 0) ? (int) (100L * masterCapacityUsed / masterCapacityTotal) : 0;
mTimeSeriesStore.record("% Alluxio Space Used", percentAlluxioSpaceUsed);
// % UFS space used
Long masterUnderfsCapacityTotal =
(Long) registry.getGauges()
.get(MetricsSystem.getMetricName(MasterMetrics.UFS_CAPACITY_TOTAL)).getValue();
Long masterUnderfsCapacityUsed =
(Long) registry.getGauges()
.get(MetricsSystem.getMetricName(MasterMetrics.UFS_CAPACITY_USED)).getValue();
int percentUfsSpaceUsed =
(masterUnderfsCapacityTotal > 0) ? (int) (100L * masterUnderfsCapacityUsed
/ masterUnderfsCapacityTotal) : 0;
mTimeSeriesStore.record("% UFS Space Used", percentUfsSpaceUsed);
}
@Override
public void close() {} // Nothing to clean up.
}
private static void cleanup(UnderFileSystem ufs, String ufsPath) {
final String errMessage = "Failed to delete UFS file {}.";
if (!ufsPath.isEmpty()) {
try {
if (!ufs.deleteExistingFile(ufsPath)) {
LOG.warn(errMessage, ufsPath);
}
} catch (IOException e) {
LOG.warn(errMessage, ufsPath, e);
}
}
}
@Override
public void updateUfsMode(AlluxioURI ufsPath, UfsMode ufsMode) throws InvalidPathException,
InvalidArgumentException, UnavailableException, AccessControlException {
// TODO(adit): Create new fsadmin audit context
try (RpcContext rpcContext = createRpcContext();
FileSystemMasterAuditContext auditContext =
createAuditContext("updateUfsMode", ufsPath, null, null)) {
mUfsManager.setUfsMode(rpcContext, ufsPath, ufsMode);
auditContext.setSucceeded(true);
}
}
/**
* Check if the specified operation type is allowed to the ufs.
*
* @param alluxioPath the Alluxio path
* @param opType the operation type
*/
private void checkUfsMode(AlluxioURI alluxioPath, OperationType opType)
throws AccessControlException, InvalidPathException {
MountTable.Resolution resolution = mMountTable.resolve(alluxioPath);
try (CloseableResource ufsResource = resolution.acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
UfsMode ufsMode =
ufs.getOperationMode(mUfsManager.getPhysicalUfsState(ufs.getPhysicalStores()));
switch (ufsMode) {
case NO_ACCESS:
throw new AccessControlException(ExceptionMessage.UFS_OP_NOT_ALLOWED.getMessage(opType,
resolution.getUri(), UfsMode.NO_ACCESS));
case READ_ONLY:
if (opType == OperationType.WRITE) {
throw new AccessControlException(ExceptionMessage.UFS_OP_NOT_ALLOWED.getMessage(opType,
resolution.getUri(), UfsMode.READ_ONLY));
}
break;
default:
// All operations are allowed
break;
}
}
}
/**
* The operation type. This class is used to check if an operation to the under storage is allowed
* during maintenance.
*/
enum OperationType {
READ,
WRITE,
}
/**
* Class that contains metrics for FileSystemMaster.
* This class is public because the counter names are referenced in
* {@link alluxio.web.WebInterfaceAbstractMetricsServlet}.
*/
public static final class Metrics {
private static final Counter DIRECTORIES_CREATED
= MetricsSystem.counter(MasterMetrics.DIRECTORIES_CREATED);
private static final Counter FILE_BLOCK_INFOS_GOT
= MetricsSystem.counter(MasterMetrics.FILE_BLOCK_INFOS_GOT);
private static final Counter FILE_INFOS_GOT
= MetricsSystem.counter(MasterMetrics.FILE_INFOS_GOT);
private static final Counter FILES_COMPLETED
= MetricsSystem.counter(MasterMetrics.FILES_COMPLETED);
private static final Counter FILES_CREATED
= MetricsSystem.counter(MasterMetrics.FILES_CREATED);
private static final Counter FILES_FREED
= MetricsSystem.counter(MasterMetrics.FILES_FREED);
private static final Counter FILES_PERSISTED
= MetricsSystem.counter(MasterMetrics.FILES_PERSISTED);
private static final Counter NEW_BLOCKS_GOT
= MetricsSystem.counter(MasterMetrics.NEW_BLOCKS_GOT);
private static final Counter PATHS_DELETED
= MetricsSystem.counter(MasterMetrics.PATHS_DELETED);
private static final Counter PATHS_MOUNTED
= MetricsSystem.counter(MasterMetrics.PATHS_MOUNTED);
private static final Counter PATHS_RENAMED
= MetricsSystem.counter(MasterMetrics.PATHS_RENAMED);
private static final Counter PATHS_UNMOUNTED
= MetricsSystem.counter(MasterMetrics.PATHS_UNMOUNTED);
// TODO(peis): Increment the RPCs OPs at the place where we receive the RPCs.
private static final Counter COMPLETE_FILE_OPS
= MetricsSystem.counter(MasterMetrics.COMPLETE_FILE_OPS);
private static final Counter CREATE_DIRECTORIES_OPS
= MetricsSystem.counter(MasterMetrics.CREATE_DIRECTORIES_OPS);
private static final Counter CREATE_FILES_OPS
= MetricsSystem.counter(MasterMetrics.CREATE_FILES_OPS);
private static final Counter DELETE_PATHS_OPS
= MetricsSystem.counter(MasterMetrics.DELETE_PATHS_OPS);
private static final Counter FREE_FILE_OPS
= MetricsSystem.counter(MasterMetrics.FREE_FILE_OPS);
private static final Counter GET_FILE_BLOCK_INFO_OPS
= MetricsSystem.counter(MasterMetrics.GET_FILE_BLOCK_INFO_OPS);
private static final Counter GET_FILE_INFO_OPS
= MetricsSystem.counter(MasterMetrics.GET_FILE_INFO_OPS);
private static final Counter GET_NEW_BLOCK_OPS
= MetricsSystem.counter(MasterMetrics.GET_NEW_BLOCK_OPS);
private static final Counter MOUNT_OPS
= MetricsSystem.counter(MasterMetrics.MOUNT_OPS);
private static final Counter RENAME_PATH_OPS
= MetricsSystem.counter(MasterMetrics.RENAME_PATH_OPS);
private static final Counter SET_ACL_OPS
= MetricsSystem.counter(MasterMetrics.SET_ACL_OPS);
private static final Counter SET_ATTRIBUTE_OPS
= MetricsSystem.counter(MasterMetrics.SET_ATTRIBUTE_OPS);
private static final Counter UNMOUNT_OPS
= MetricsSystem.counter(MasterMetrics.UNMOUNT_OPS);
/**
* Register some file system master related gauges.
*
* @param master the file system master
* @param ufsManager the under filesystem manager
*/
@VisibleForTesting
public static void registerGauges(
final FileSystemMaster master, final UfsManager ufsManager) {
MetricsSystem.registerGaugeIfAbsent(MetricsSystem
.getMetricName(MasterMetrics.FILES_PINNED),
master::getNumberOfPinnedFiles);
MetricsSystem.registerGaugeIfAbsent(MetricsSystem
.getMetricName(MasterMetrics.TOTAL_PATHS),
() -> master.getInodeCount());
final String ufsDataFolder = ServerConfiguration.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS);
MetricsSystem.registerGaugeIfAbsent(MetricsSystem
.getMetricName(MasterMetrics.UFS_CAPACITY_TOTAL),
() -> {
try (CloseableResource ufsResource =
ufsManager.getRoot().acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
return ufs.getSpace(ufsDataFolder, UnderFileSystem.SpaceType.SPACE_TOTAL);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
return Stream.empty();
}
});
MetricsSystem.registerGaugeIfAbsent(MetricsSystem
.getMetricName(MasterMetrics.UFS_CAPACITY_USED),
() -> {
try (CloseableResource ufsResource =
ufsManager.getRoot().acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
return ufs.getSpace(ufsDataFolder, UnderFileSystem.SpaceType.SPACE_USED);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
return Stream.empty();
}
});
MetricsSystem.registerGaugeIfAbsent(MetricsSystem
.getMetricName(MasterMetrics.UFS_CAPACITY_FREE),
() -> {
long ret = 0L;
try (CloseableResource ufsResource =
ufsManager.getRoot().acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
ret = ufs.getSpace(ufsDataFolder, UnderFileSystem.SpaceType.SPACE_FREE);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
}
return ret;
});
}
private Metrics() {} // prevent instantiation
}
/**
* Creates a {@link FileSystemMasterAuditContext} instance.
*
* @param command the command to be logged by this {@link AuditContext}
* @param srcPath the source path of this command
* @param dstPath the destination path of this command
* @param srcInode the source inode of this command
* @return newly-created {@link FileSystemMasterAuditContext} instance
*/
private FileSystemMasterAuditContext createAuditContext(String command, AlluxioURI srcPath,
@Nullable AlluxioURI dstPath, @Nullable Inode srcInode) {
FileSystemMasterAuditContext auditContext =
new FileSystemMasterAuditContext(mAsyncAuditLogWriter);
if (mAsyncAuditLogWriter != null) {
String user = null;
String ugi = "";
try {
user = AuthenticatedClientUser.getClientUser(ServerConfiguration.global());
} catch (AccessControlException e) {
ugi = "N/A";
}
if (user != null) {
try {
String primaryGroup = CommonUtils.getPrimaryGroupName(user, ServerConfiguration.global());
ugi = user + "," + primaryGroup;
} catch (IOException e) {
LOG.debug("Failed to get primary group for user {}.", user);
ugi = user + ",N/A";
}
}
AuthType authType =
ServerConfiguration.getEnum(PropertyKey.SECURITY_AUTHENTICATION_TYPE, AuthType.class);
auditContext.setUgi(ugi)
.setAuthType(authType)
.setIp(ClientIpAddressInjector.getIpAddress())
.setCommand(command).setSrcPath(srcPath).setDstPath(dstPath)
.setSrcInode(srcInode).setAllowed(true);
}
return auditContext;
}
private BlockDeletionContext createBlockDeletionContext() {
return new DefaultBlockDeletionContext(this::removeBlocks,
blocks -> blocks.forEach(mUfsBlockLocationCache::invalidate));
}
private void removeBlocks(List blocks) throws IOException {
if (blocks.isEmpty()) {
return;
}
RetryPolicy retry = new CountingRetry(3);
IOException lastThrown = null;
while (retry.attempt()) {
try {
mBlockMaster.removeBlocks(blocks, true);
return;
} catch (UnavailableException e) {
lastThrown = e;
}
}
throw new IOException("Failed to remove deleted blocks from block master", lastThrown);
}
/**
* @return a context for executing an RPC
*/
@VisibleForTesting
public RpcContext createRpcContext() throws UnavailableException {
return new RpcContext(createBlockDeletionContext(), createJournalContext());
}
private LockingScheme createLockingScheme(AlluxioURI path, FileSystemMasterCommonPOptions options,
LockPattern desiredLockMode) {
return createLockingScheme(path, options, desiredLockMode, false);
}
private LockingScheme createLockingScheme(AlluxioURI path, FileSystemMasterCommonPOptions options,
LockPattern desiredLockMode, boolean isGetFileInfo) {
// If client options didn't specify the interval, fallback to whatever the server has
// configured to prevent unnecessary syncing due to the default value being 0
long syncInterval = options.hasSyncIntervalMs() ? options.getSyncIntervalMs() :
ServerConfiguration.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL);
boolean shouldSync =
mUfsSyncPathCache.shouldSyncPath(path.getPath(), syncInterval, isGetFileInfo);
return new LockingScheme(path, desiredLockMode, shouldSync);
}
private boolean isAclEnabled() {
return ServerConfiguration.getBoolean(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED);
}
@Override
public List getTimeSeries() {
return mTimeSeriesStore.getTimeSeries();
}
@Override
public AlluxioURI reverseResolve(AlluxioURI ufsUri) throws InvalidPathException {
MountTable.ReverseResolution resolution = mMountTable.reverseResolve(ufsUri);
if (resolution == null) {
throw new InvalidPathException(ufsUri.toString() + " is not a valid ufs uri");
}
return resolution.getUri();
}
}