
alluxio.master.meta.DefaultMetaMaster Maven / Gradle / Ivy
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.meta;
import alluxio.AlluxioURI;
import alluxio.ClientContext;
import alluxio.Constants;
import alluxio.Server;
import alluxio.clock.SystemClock;
import alluxio.collections.IndexDefinition;
import alluxio.collections.IndexedSet;
import alluxio.conf.ConfigurationValueOptions;
import alluxio.conf.PropertyKey;
import alluxio.conf.ServerConfiguration;
import alluxio.conf.Source;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.status.NotFoundException;
import alluxio.exception.status.UnavailableException;
import alluxio.grpc.BackupPOptions;
import alluxio.grpc.GetConfigurationPOptions;
import alluxio.grpc.GrpcService;
import alluxio.grpc.MetaCommand;
import alluxio.grpc.RegisterMasterPOptions;
import alluxio.grpc.Scope;
import alluxio.grpc.ServiceType;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatExecutor;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.master.BackupManager;
import alluxio.master.CoreMaster;
import alluxio.master.CoreMasterContext;
import alluxio.master.MasterClientContext;
import alluxio.master.block.BlockMaster;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.checkpoint.CheckpointName;
import alluxio.master.meta.checkconf.ServerConfigurationChecker;
import alluxio.master.meta.checkconf.ServerConfigurationStore;
import alluxio.proto.journal.Journal;
import alluxio.resource.LockResource;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.UnderFileSystemConfiguration;
import alluxio.underfs.options.MkdirsOptions;
import alluxio.util.ConfigurationUtils;
import alluxio.util.IdUtils;
import alluxio.util.ThreadFactoryUtils;
import alluxio.util.URIUtils;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceFactory;
import alluxio.util.io.PathUtils;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.wire.Address;
import alluxio.wire.BackupResponse;
import alluxio.wire.ConfigCheckReport;
import alluxio.wire.ConfigHash;
import alluxio.wire.Configuration;
import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.time.Clock;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import javax.annotation.concurrent.NotThreadSafe;
/**
* The default meta master.
*/
@NotThreadSafe
public final class DefaultMetaMaster extends CoreMaster implements MetaMaster {
private static final Logger LOG = LoggerFactory.getLogger(DefaultMetaMaster.class);
private static final Set> DEPS =
ImmutableSet.>of(BlockMaster.class);
// Master metadata management.
private static final IndexDefinition ID_INDEX =
new IndexDefinition(true) {
@Override
public Long getFieldValue(MasterInfo o) {
return o.getId();
}
};
private static final IndexDefinition ADDRESS_INDEX =
new IndexDefinition(true) {
@Override
public Address getFieldValue(MasterInfo o) {
return o.getAddress();
}
};
/** Handle to the block master. */
private final BlockMaster mBlockMaster;
/** The clock to use for determining the time. */
private final Clock mClock = new SystemClock();
/** The master configuration store. */
private final ServerConfigurationStore mMasterConfigStore = new ServerConfigurationStore();
/** The worker configuration store. */
private final ServerConfigurationStore mWorkerConfigStore = new ServerConfigurationStore();
/** The server-side configuration checker. */
private final ServerConfigurationChecker mConfigChecker =
new ServerConfigurationChecker(mMasterConfigStore, mWorkerConfigStore);
/** Keeps track of standby masters which are in communication with the leader master. */
private final IndexedSet mMasters =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** Keeps track of standby masters which are no longer in communication with the leader master. */
private final IndexedSet mLostMasters =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** The connect address for the rpc server. */
private final InetSocketAddress mRpcConnectAddress
= NetworkAddressUtils.getConnectAddress(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global());
/** The address of this master. */
private Address mMasterAddress;
/** The root ufs. */
private final UnderFileSystem mUfs;
/** The metadata daily backup. */
private DailyMetadataBackup mDailyBackup;
/** Path level properties. */
private PathProperties mPathProperties;
/**
* Creates a new instance of {@link DefaultMetaMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
*/
DefaultMetaMaster(BlockMaster blockMaster, CoreMasterContext masterContext) {
this(blockMaster, masterContext,
ExecutorServiceFactories.cachedThreadPool(Constants.META_MASTER_NAME));
}
/**
* Creates a new instance of {@link DefaultMetaMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
* @param executorServiceFactory a factory for creating the executor service to use for running
* maintenance threads
*/
DefaultMetaMaster(BlockMaster blockMaster, CoreMasterContext masterContext,
ExecutorServiceFactory executorServiceFactory) {
super(masterContext, new SystemClock(), executorServiceFactory);
mMasterAddress =
new Address().setHost(ServerConfiguration.getOrDefault(PropertyKey.MASTER_HOSTNAME,
"localhost"))
.setRpcPort(mPort);
mBlockMaster = blockMaster;
mBlockMaster.registerLostWorkerFoundListener(mWorkerConfigStore::lostNodeFound);
mBlockMaster.registerWorkerLostListener(mWorkerConfigStore::handleNodeLost);
mBlockMaster.registerNewWorkerConfListener(mWorkerConfigStore::registerNewConf);
if (URIUtils.isLocalFilesystem(ServerConfiguration
.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS))) {
mUfs = UnderFileSystem.Factory
.create("/", UnderFileSystemConfiguration.defaults(ServerConfiguration.global()));
} else {
mUfs = UnderFileSystem.Factory.createForRoot(ServerConfiguration.global());
}
mPathProperties = new PathProperties();
}
@Override
public Map getServices() {
Map services = new HashMap<>();
services.put(ServiceType.META_MASTER_CONFIG_SERVICE,
new GrpcService(new MetaMasterConfigurationServiceHandler(this)).disableAuthentication());
services.put(ServiceType.META_MASTER_CLIENT_SERVICE,
new GrpcService(new MetaMasterClientServiceHandler(this)));
services.put(ServiceType.META_MASTER_MASTER_SERVICE,
new GrpcService(new MetaMasterMasterServiceHandler(this)));
return services;
}
@Override
public String getName() {
return Constants.META_MASTER_NAME;
}
@Override
public Set> getDependencies() {
return DEPS;
}
@Override
public void start(Boolean isPrimary) throws IOException {
super.start(isPrimary);
mWorkerConfigStore.reset();
mMasterConfigStore.reset();
if (isPrimary) {
// Add the configuration of the current leader master
mMasterConfigStore.registerNewConf(mMasterAddress,
ConfigurationUtils.getConfiguration(ServerConfiguration.global(), Scope.MASTER));
// The service that detects lost standby master nodes
getExecutorService().submit(new HeartbeatThread(
HeartbeatContext.MASTER_LOST_MASTER_DETECTION,
new LostMasterDetectionHeartbeatExecutor(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_LOG_CONFIG_REPORT_SCHEDULING,
new LogConfigReportHeartbeatExecutor(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
if (ServerConfiguration.getBoolean(PropertyKey.MASTER_DAILY_BACKUP_ENABLED)) {
mDailyBackup = new DailyMetadataBackup(this, Executors.newSingleThreadScheduledExecutor(
ThreadFactoryUtils.build("DailyMetadataBackup-%d", true)), mUfs);
mDailyBackup.start();
}
} else {
if (ConfigurationUtils.isHaMode(ServerConfiguration.global())) {
// Standby master should setup MetaMasterSync to communicate with the leader master
RetryHandlingMetaMasterMasterClient metaMasterClient =
new RetryHandlingMetaMasterMasterClient(MasterClientContext
.newBuilder(ClientContext.create(ServerConfiguration.global())).build());
getExecutorService().submit(new HeartbeatThread(HeartbeatContext.META_MASTER_SYNC,
new MetaMasterSync(mMasterAddress, metaMasterClient),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
LOG.info("Standby master with address {} starts sending heartbeat to leader master.",
mMasterAddress);
}
}
}
@Override
public void stop() throws IOException {
if (mDailyBackup != null) {
mDailyBackup.stop();
mDailyBackup = null;
}
super.stop();
}
@Override
public BackupResponse backup(BackupPOptions options) throws IOException {
String dir = options.hasTargetDirectory() ? options.getTargetDirectory()
: ServerConfiguration.get(PropertyKey.MASTER_BACKUP_DIRECTORY);
UnderFileSystem ufs = mUfs;
if ((options.getLocalFileSystem() || !options.hasTargetDirectory())
&& !ufs.getUnderFSType().equals("local")) {
ufs = UnderFileSystem.Factory.create("/",
UnderFileSystemConfiguration.defaults(ServerConfiguration.global()));
LOG.info("Backing up to local filesystem in directory {}", dir);
} else {
LOG.info("Backing up to root UFS in directory {}", dir);
}
if (!ufs.isDirectory(dir)) {
if (!ufs.mkdirs(dir, MkdirsOptions.defaults(ServerConfiguration.global())
.setCreateParent(true))) {
throw new IOException(String.format("Failed to create directory %s", dir));
}
}
String backupFilePath;
try (LockResource lr = new LockResource(mMasterContext.pauseStateLock())) {
Instant now = Instant.now();
String backupFileName = String.format(BackupManager.BACKUP_FILE_FORMAT,
DateTimeFormatter.ISO_LOCAL_DATE.withZone(ZoneId.of("UTC")).format(now),
now.toEpochMilli());
backupFilePath = PathUtils.concatPath(dir, backupFileName);
try {
try (OutputStream ufsStream = ufs.create(backupFilePath)) {
mBackupManager.backup(ufsStream);
}
} catch (Throwable t) {
try {
ufs.deleteExistingFile(backupFilePath);
} catch (Throwable t2) {
LOG.error("Failed to clean up failed backup at {}", backupFilePath, t2);
t.addSuppressed(t2);
}
throw t;
}
}
String rootUfs = ServerConfiguration.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS);
if (options.getLocalFileSystem()) {
rootUfs = "file:///";
}
AlluxioURI backupUri = new AlluxioURI(new AlluxioURI(rootUfs), new AlluxioURI(backupFilePath));
return new BackupResponse(backupUri,
NetworkAddressUtils.getConnectHost(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global()));
}
@Override
public String checkpoint() throws IOException {
try (LockResource lr = new LockResource(mMasterContext.pauseStateLock())) {
mJournalSystem.checkpoint();
}
return NetworkAddressUtils.getConnectHost(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global());
}
@Override
public ConfigCheckReport getConfigCheckReport() {
return mConfigChecker.getConfigCheckReport();
}
@Override
public Configuration getConfiguration(GetConfigurationPOptions options) {
// NOTE(cc): there is no guarantee that the returned cluster and path configurations are
// consistent snapshot of the system's state at a certain time, the path configuration might
// be in a newer state. But it's guaranteed that the hashes are respectively correspondent to
// the properties.
Configuration.Builder builder = Configuration.newBuilder();
if (!options.getIgnoreClusterConf()) {
for (PropertyKey key : ServerConfiguration.keySet()) {
if (key.isBuiltIn()) {
Source source = ServerConfiguration.getSource(key);
String value = ServerConfiguration.getOrDefault(key, null,
ConfigurationValueOptions.defaults().useDisplayValue(true)
.useRawValue(options.getRawValue()));
builder.addClusterProperty(key.getName(), value, source);
}
}
// NOTE(cc): assumes that ServerConfiguration is read-only when master is running, otherwise,
// the following hash might not correspond to the above cluster configuration.
builder.setClusterConfHash(ServerConfiguration.hash());
}
if (!options.getIgnorePathConf()) {
PathPropertiesView pathProperties = mPathProperties.snapshot();
pathProperties.getProperties().forEach((path, properties) ->
properties.forEach((key, value) ->
builder.addPathProperty(path, key, value)));
builder.setPathConfHash(pathProperties.getHash());
}
return builder.build();
}
@Override
public ConfigHash getConfigHash() {
return new ConfigHash(ServerConfiguration.hash(), mPathProperties.hash());
}
@Override
public void setPathConfiguration(String path, Map properties)
throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.add(ctx, path, properties);
}
}
@Override
public void removePathConfiguration(String path, Set keys)
throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.remove(ctx, path, keys);
}
}
@Override
public void removePathConfiguration(String path) throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.removeAll(ctx, path);
}
}
@Override
public List getMasterAddresses() {
return mMasterConfigStore.getLiveNodeAddresses();
}
@Override
public List getWorkerAddresses() {
return mWorkerConfigStore.getLiveNodeAddresses();
}
@Override
public long getMasterId(Address address) {
MasterInfo existingMaster = mMasters.getFirstByField(ADDRESS_INDEX, address);
if (existingMaster != null) {
// This master address is already mapped to a master id.
long oldMasterId = existingMaster.getId();
LOG.warn("The master {} already exists as id {}.", address, oldMasterId);
return oldMasterId;
}
MasterInfo lostMaster = mLostMasters.getFirstByField(ADDRESS_INDEX, address);
if (lostMaster != null) {
// This is one of the lost masters
mMasterConfigStore.lostNodeFound(lostMaster.getAddress());
synchronized (lostMaster) {
final long lostMasterId = lostMaster.getId();
LOG.warn("A lost master {} has requested its old id {}.", address, lostMasterId);
// Update the timestamp of the master before it is considered an active master.
lostMaster.updateLastUpdatedTimeMs();
mMasters.add(lostMaster);
mLostMasters.remove(lostMaster);
return lostMasterId;
}
}
// Generate a new master id.
long masterId = IdUtils.getRandomNonNegativeLong();
while (!mMasters.add(new MasterInfo(masterId, address))) {
masterId = IdUtils.getRandomNonNegativeLong();
}
LOG.info("getMasterId(): MasterAddress: {} id: {}", address, masterId);
return masterId;
}
@Override
public InetSocketAddress getRpcAddress() {
return mRpcConnectAddress;
}
@Override
public long getStartTimeMs() {
return mStartTimeMs;
}
@Override
public long getUptimeMs() {
return System.currentTimeMillis() - mStartTimeMs;
}
@Override
public int getWebPort() {
return ServerConfiguration.getInt(PropertyKey.MASTER_WEB_PORT);
}
@Override
public boolean isInSafeMode() {
return mSafeModeManager.isInSafeMode();
}
@Override
public MetaCommand masterHeartbeat(long masterId) {
MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId);
if (master == null) {
LOG.warn("Could not find master id: {} for heartbeat.", masterId);
return MetaCommand.MetaCommand_Register;
}
master.updateLastUpdatedTimeMs();
return MetaCommand.MetaCommand_Nothing;
}
@Override
public void masterRegister(long masterId, RegisterMasterPOptions options)
throws NotFoundException {
MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId);
if (master == null) {
throw new NotFoundException(ExceptionMessage.NO_MASTER_FOUND.getMessage(masterId));
}
master.updateLastUpdatedTimeMs();
mMasterConfigStore.registerNewConf(master.getAddress(), options.getConfigsList());
LOG.info("registerMaster(): master: {}", master);
}
@Override
public CheckpointName getCheckpointName() {
return CheckpointName.META_MASTER;
}
@Override
public Iterator getJournalEntryIterator() {
return mPathProperties.getJournalEntryIterator();
}
@Override
public boolean processJournalEntry(Journal.JournalEntry entry) {
return mPathProperties.processJournalEntry(entry);
}
@Override
public void resetState() {
mPathProperties.resetState();
}
/**
* Lost master periodic check.
*/
private final class LostMasterDetectionHeartbeatExecutor implements HeartbeatExecutor {
/**
* Constructs a new {@link LostMasterDetectionHeartbeatExecutor}.
*/
public LostMasterDetectionHeartbeatExecutor() {
}
@Override
public void heartbeat() {
long masterTimeoutMs = ServerConfiguration.getMs(PropertyKey.MASTER_HEARTBEAT_TIMEOUT);
for (MasterInfo master : mMasters) {
synchronized (master) {
final long lastUpdate = mClock.millis() - master.getLastUpdatedTimeMs();
if (lastUpdate > masterTimeoutMs) {
LOG.error("The master {}({}) timed out after {}ms without a heartbeat!", master.getId(),
master.getAddress(), lastUpdate);
mLostMasters.add(master);
mMasters.remove(master);
mMasterConfigStore.handleNodeLost(master.getAddress());
}
}
}
}
@Override
public void close() {
// Nothing to clean up
}
}
/**
* Periodically log the config check report.
*/
private final class LogConfigReportHeartbeatExecutor implements HeartbeatExecutor {
private volatile boolean mFirst = true;
@Override
public void heartbeat() {
// Skip the first heartbeat since it happens before servers have time to register their
// configurations.
if (mFirst) {
mFirst = false;
} else {
mConfigChecker.logConfigReport();
}
}
@Override
public void close() {
// Nothing to clean up
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy