alluxio.master.meta.DefaultMetaMaster Maven / Gradle / Ivy
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.meta;
import alluxio.AlluxioURI;
import alluxio.ClientContext;
import alluxio.Constants;
import alluxio.ProjectConstants;
import alluxio.Server;
import alluxio.clock.SystemClock;
import alluxio.collections.IndexDefinition;
import alluxio.collections.IndexedSet;
import alluxio.conf.ConfigurationValueOptions;
import alluxio.conf.PropertyKey;
import alluxio.conf.ServerConfiguration;
import alluxio.conf.Source;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.status.NotFoundException;
import alluxio.exception.status.UnavailableException;
import alluxio.grpc.BackupPOptions;
import alluxio.grpc.GetConfigurationPOptions;
import alluxio.grpc.GrpcService;
import alluxio.grpc.MetaCommand;
import alluxio.grpc.RegisterMasterPOptions;
import alluxio.grpc.Scope;
import alluxio.grpc.ServiceType;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatExecutor;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.master.BackupManager;
import alluxio.master.CoreMaster;
import alluxio.master.CoreMasterContext;
import alluxio.master.MasterClientContext;
import alluxio.master.block.BlockMaster;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.checkpoint.CheckpointName;
import alluxio.master.meta.checkconf.ServerConfigurationChecker;
import alluxio.master.meta.checkconf.ServerConfigurationStore;
import alluxio.proto.journal.Journal;
import alluxio.proto.journal.Meta;
import alluxio.resource.CloseableResource;
import alluxio.resource.LockResource;
import alluxio.underfs.UfsManager;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.UnderFileSystemConfiguration;
import alluxio.underfs.options.MkdirsOptions;
import alluxio.util.ConfigurationUtils;
import alluxio.util.IdUtils;
import alluxio.util.ThreadFactoryUtils;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceFactory;
import alluxio.util.io.PathUtils;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.wire.Address;
import alluxio.wire.BackupResponse;
import alluxio.wire.ConfigCheckReport;
import alluxio.wire.ConfigHash;
import alluxio.wire.Configuration;
import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.time.Clock;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import javax.annotation.concurrent.NotThreadSafe;
/**
* The default meta master.
*/
@NotThreadSafe
public final class DefaultMetaMaster extends CoreMaster implements MetaMaster {
private static final Logger LOG = LoggerFactory.getLogger(DefaultMetaMaster.class);
private static final Set> DEPS =
ImmutableSet.>of(BlockMaster.class);
// Master metadata management.
private static final IndexDefinition ID_INDEX =
new IndexDefinition(true) {
@Override
public Long getFieldValue(MasterInfo o) {
return o.getId();
}
};
private static final IndexDefinition ADDRESS_INDEX =
new IndexDefinition(true) {
@Override
public Address getFieldValue(MasterInfo o) {
return o.getAddress();
}
};
/** Handle to the block master. */
private final BlockMaster mBlockMaster;
/** The clock to use for determining the time. */
private final Clock mClock = new SystemClock();
/** The master configuration store. */
private final ServerConfigurationStore mMasterConfigStore = new ServerConfigurationStore();
/** The worker configuration store. */
private final ServerConfigurationStore mWorkerConfigStore = new ServerConfigurationStore();
/** The server-side configuration checker. */
private final ServerConfigurationChecker mConfigChecker =
new ServerConfigurationChecker(mMasterConfigStore, mWorkerConfigStore);
/** Keeps track of standby masters which are in communication with the leader master. */
private final IndexedSet mMasters =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** Keeps track of standby masters which are no longer in communication with the leader master. */
private final IndexedSet mLostMasters =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** The connect address for the rpc server. */
private final InetSocketAddress mRpcConnectAddress
= NetworkAddressUtils.getConnectAddress(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global());
/** Indicates if newer version is available. */
private boolean mNewerVersionAvailable;
/** The address of this master. */
private Address mMasterAddress;
/** The manager of all ufs. */
private final UfsManager mUfsManager;
/** The metadata daily backup. */
private DailyMetadataBackup mDailyBackup;
/** Path level properties. */
private PathProperties mPathProperties;
/** Persisted state for MetaMaster. */
private State mState;
/** Value to be used for the cluster ID when not assigned. */
public static final String INVALID_CLUSTER_ID = "INVALID_CLUSTER_ID";
/**
* Journaled state for MetaMaster.
*/
@NotThreadSafe
public static final class State implements alluxio.master.journal.Journaled {
/** A unique ID to identify the cluster. */
private String mClusterID = INVALID_CLUSTER_ID;
/**
* @return the cluster ID
*/
public String getClusterID() {
return mClusterID;
}
@Override
public CheckpointName getCheckpointName() {
return CheckpointName.CLUSTER_INFO;
}
@Override
public boolean processJournalEntry(Journal.JournalEntry entry) {
if (entry.hasClusterInfo()) {
mClusterID = entry.getClusterInfo().getClusterId();
return true;
}
return false;
}
/**
* @param ctx the journal context
* @param clusterId the clusterId journal clusterId
*/
public void applyAndJournal(java.util.function.Supplier ctx, String clusterId) {
applyAndJournal(ctx,
Journal.JournalEntry.newBuilder()
.setClusterInfo(Meta.ClusterInfoEntry.newBuilder().setClusterId(clusterId).build())
.build());
}
@Override
public void resetState() {
mClusterID = INVALID_CLUSTER_ID;
}
@Override
public Iterator getJournalEntryIterator() {
if (mClusterID.equals(INVALID_CLUSTER_ID)) {
return Collections.emptyIterator();
}
return Collections.singleton(Journal.JournalEntry.newBuilder()
.setClusterInfo(Meta.ClusterInfoEntry.newBuilder().setClusterId(mClusterID).build())
.build()).iterator();
}
}
/**
* Creates a new instance of {@link DefaultMetaMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
*/
DefaultMetaMaster(BlockMaster blockMaster, CoreMasterContext masterContext) {
this(blockMaster, masterContext,
ExecutorServiceFactories.cachedThreadPool(Constants.META_MASTER_NAME));
}
/**
* Creates a new instance of {@link DefaultMetaMaster}.
*
* @param blockMaster a block master handle
* @param masterContext the context for Alluxio master
* @param executorServiceFactory a factory for creating the executor service to use for running
* maintenance threads
*/
DefaultMetaMaster(BlockMaster blockMaster, CoreMasterContext masterContext,
ExecutorServiceFactory executorServiceFactory) {
super(masterContext, new SystemClock(), executorServiceFactory);
mMasterAddress =
new Address().setHost(ServerConfiguration.getOrDefault(PropertyKey.MASTER_HOSTNAME,
"localhost"))
.setRpcPort(mPort);
mBlockMaster = blockMaster;
mBlockMaster.registerLostWorkerFoundListener(mWorkerConfigStore::lostNodeFound);
mBlockMaster.registerWorkerLostListener(mWorkerConfigStore::handleNodeLost);
mBlockMaster.registerNewWorkerConfListener(mWorkerConfigStore::registerNewConf);
mUfsManager = masterContext.getUfsManager();
mPathProperties = new PathProperties();
mState = new State();
}
@Override
public Map getServices() {
Map services = new HashMap<>();
services.put(ServiceType.META_MASTER_CONFIG_SERVICE,
new GrpcService(new MetaMasterConfigurationServiceHandler(this)).disableAuthentication());
services.put(ServiceType.META_MASTER_CLIENT_SERVICE,
new GrpcService(new MetaMasterClientServiceHandler(this)));
services.put(ServiceType.META_MASTER_MASTER_SERVICE,
new GrpcService(new MetaMasterMasterServiceHandler(this)));
return services;
}
@Override
public String getName() {
return Constants.META_MASTER_NAME;
}
@Override
public Set> getDependencies() {
return DEPS;
}
@Override
public void start(Boolean isPrimary) throws IOException {
super.start(isPrimary);
mWorkerConfigStore.reset();
mMasterConfigStore.reset();
if (isPrimary) {
// Add the configuration of the current leader master
mMasterConfigStore.registerNewConf(mMasterAddress,
ConfigurationUtils.getConfiguration(ServerConfiguration.global(), Scope.MASTER));
// The service that detects lost standby master nodes
getExecutorService().submit(new HeartbeatThread(
HeartbeatContext.MASTER_LOST_MASTER_DETECTION,
new LostMasterDetectionHeartbeatExecutor(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.MASTER_LOG_CONFIG_REPORT_SCHEDULING,
new LogConfigReportHeartbeatExecutor(),
(int) ServerConfiguration
.getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
if (ServerConfiguration.getBoolean(PropertyKey.MASTER_DAILY_BACKUP_ENABLED)) {
mDailyBackup = new DailyMetadataBackup(this, Executors.newSingleThreadScheduledExecutor(
ThreadFactoryUtils.build("DailyMetadataBackup-%d", true)), mUfsManager);
mDailyBackup.start();
}
if (mState.getClusterID() == INVALID_CLUSTER_ID) {
try (JournalContext context = createJournalContext()) {
String clusterID = java.util.UUID.randomUUID().toString();
mState.applyAndJournal(context, clusterID);
LOG.info("Created new cluster ID {}", clusterID);
}
if (Boolean.valueOf(ProjectConstants.UPDATE_CHECK_ENABLED)
&& ServerConfiguration.getBoolean(PropertyKey.MASTER_UPDATE_CHECK_ENABLED)) {
try {
String latestVersion =
UpdateCheck.getLatestVersion(mState.getClusterID(), 3000, 3000, 3000);
if (!ProjectConstants.VERSION.equals(latestVersion)) {
System.out.println("The latest version (" + latestVersion + ") is not the same "
+ "as the current version (" + ProjectConstants.VERSION + "). To upgrade "
+ "visit https://www.alluxio.io/download/.");
mNewerVersionAvailable = true;
}
} catch (Exception e) {
LOG.debug("Unable to check for updates: {}", e.getMessage());
}
}
} else {
LOG.info("Detected existing cluster ID {}", mState.getClusterID());
}
} else {
if (ConfigurationUtils.isHaMode(ServerConfiguration.global())) {
// Standby master should setup MetaMasterSync to communicate with the leader master
RetryHandlingMetaMasterMasterClient metaMasterClient =
new RetryHandlingMetaMasterMasterClient(MasterClientContext
.newBuilder(ClientContext.create(ServerConfiguration.global())).build());
getExecutorService().submit(new HeartbeatThread(HeartbeatContext.META_MASTER_SYNC,
new MetaMasterSync(mMasterAddress, metaMasterClient),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
LOG.info("Standby master with address {} starts sending heartbeat to leader master.",
mMasterAddress);
}
}
}
@Override
public void stop() throws IOException {
if (mDailyBackup != null) {
mDailyBackup.stop();
mDailyBackup = null;
}
super.stop();
}
@Override
public BackupResponse backup(BackupPOptions options) throws IOException {
String dir = options.hasTargetDirectory() ? options.getTargetDirectory()
: ServerConfiguration.get(PropertyKey.MASTER_BACKUP_DIRECTORY);
try (CloseableResource ufsResource =
mUfsManager.getRoot().acquireUfsResource()) {
UnderFileSystem ufs = ufsResource.get();
if (options.getLocalFileSystem() && !ufs.getUnderFSType().equals("local")) {
// TODO(lu) Support getting UFS based on type from UfsManager
ufs = UnderFileSystem.Factory.create("/",
UnderFileSystemConfiguration.defaults(ServerConfiguration.global()));
LOG.info("Backing up to local filesystem in directory {}", dir);
} else {
LOG.info("Backing up to root UFS in directory {}", dir);
}
if (!ufs.isDirectory(dir)) {
if (!ufs.mkdirs(dir, MkdirsOptions.defaults(ServerConfiguration.global())
.setCreateParent(true))) {
throw new IOException(String.format("Failed to create directory %s", dir));
}
}
String backupFilePath;
AtomicLong entryCount = new AtomicLong(0);
try (LockResource lr = new LockResource(mMasterContext.pauseStateLock())) {
Instant now = Instant.now();
String backupFileName = String.format(BackupManager.BACKUP_FILE_FORMAT,
DateTimeFormatter.ISO_LOCAL_DATE.withZone(ZoneId.of("UTC")).format(now),
now.toEpochMilli());
backupFilePath = PathUtils.concatPath(dir, backupFileName);
try {
try (OutputStream ufsStream = ufs.create(backupFilePath)) {
mBackupManager.backup(ufsStream, entryCount);
}
} catch (Throwable t) {
try {
ufs.deleteExistingFile(backupFilePath);
} catch (Throwable t2) {
LOG.error("Failed to clean up failed backup at {}", backupFilePath, t2);
t.addSuppressed(t2);
}
throw t;
}
}
String rootUfs = ServerConfiguration.get(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS);
if (options.getLocalFileSystem()) {
rootUfs = "file:///";
}
AlluxioURI backupUri =
new AlluxioURI(new AlluxioURI(rootUfs), new AlluxioURI(backupFilePath));
return new BackupResponse(
backupUri,
NetworkAddressUtils.getConnectHost(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global()),
entryCount.get());
}
}
@Override
public String checkpoint() throws IOException {
try (LockResource lr = new LockResource(mMasterContext.pauseStateLock())) {
mJournalSystem.checkpoint();
}
return NetworkAddressUtils.getConnectHost(NetworkAddressUtils.ServiceType.MASTER_RPC,
ServerConfiguration.global());
}
@Override
public ConfigCheckReport getConfigCheckReport() {
return mConfigChecker.getConfigCheckReport();
}
@Override
public Configuration getConfiguration(GetConfigurationPOptions options) {
// NOTE(cc): there is no guarantee that the returned cluster and path configurations are
// consistent snapshot of the system's state at a certain time, the path configuration might
// be in a newer state. But it's guaranteed that the hashes are respectively correspondent to
// the properties.
Configuration.Builder builder = Configuration.newBuilder();
if (!options.getIgnoreClusterConf()) {
for (PropertyKey key : ServerConfiguration.keySet()) {
if (key.isBuiltIn()) {
Source source = ServerConfiguration.getSource(key);
String value = ServerConfiguration.getOrDefault(key, null,
ConfigurationValueOptions.defaults().useDisplayValue(true)
.useRawValue(options.getRawValue()));
builder.addClusterProperty(key.getName(), value, source);
}
}
// NOTE(cc): assumes that ServerConfiguration is read-only when master is running, otherwise,
// the following hash might not correspond to the above cluster configuration.
builder.setClusterConfHash(ServerConfiguration.hash());
}
if (!options.getIgnorePathConf()) {
PathPropertiesView pathProperties = mPathProperties.snapshot();
pathProperties.getProperties().forEach((path, properties) ->
properties.forEach((key, value) ->
builder.addPathProperty(path, key, value)));
builder.setPathConfHash(pathProperties.getHash());
}
return builder.build();
}
@Override
public ConfigHash getConfigHash() {
return new ConfigHash(ServerConfiguration.hash(), mPathProperties.hash());
}
@Override
public void setPathConfiguration(String path, Map properties)
throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.add(ctx, path, properties);
}
}
@Override
public void removePathConfiguration(String path, Set keys)
throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.remove(ctx, path, keys);
}
}
@Override
public void removePathConfiguration(String path) throws UnavailableException {
try (JournalContext ctx = createJournalContext()) {
mPathProperties.removeAll(ctx, path);
}
}
@Override
public boolean getNewerVersionAvailable() {
return mNewerVersionAvailable;
}
@Override
public List getMasterAddresses() {
return mMasterConfigStore.getLiveNodeAddresses();
}
@Override
public List getWorkerAddresses() {
return mWorkerConfigStore.getLiveNodeAddresses();
}
@Override
public long getMasterId(Address address) {
MasterInfo existingMaster = mMasters.getFirstByField(ADDRESS_INDEX, address);
if (existingMaster != null) {
// This master address is already mapped to a master id.
long oldMasterId = existingMaster.getId();
LOG.warn("The master {} already exists as id {}.", address, oldMasterId);
return oldMasterId;
}
MasterInfo lostMaster = mLostMasters.getFirstByField(ADDRESS_INDEX, address);
if (lostMaster != null) {
// This is one of the lost masters
mMasterConfigStore.lostNodeFound(lostMaster.getAddress());
synchronized (lostMaster) {
final long lostMasterId = lostMaster.getId();
LOG.warn("A lost master {} has requested its old id {}.", address, lostMasterId);
// Update the timestamp of the master before it is considered an active master.
lostMaster.updateLastUpdatedTimeMs();
mMasters.add(lostMaster);
mLostMasters.remove(lostMaster);
return lostMasterId;
}
}
// Generate a new master id.
long masterId = IdUtils.getRandomNonNegativeLong();
while (!mMasters.add(new MasterInfo(masterId, address))) {
masterId = IdUtils.getRandomNonNegativeLong();
}
LOG.info("getMasterId(): MasterAddress: {} id: {}", address, masterId);
return masterId;
}
@Override
public InetSocketAddress getRpcAddress() {
return mRpcConnectAddress;
}
@Override
public long getStartTimeMs() {
return mStartTimeMs;
}
@Override
public long getUptimeMs() {
return System.currentTimeMillis() - mStartTimeMs;
}
@Override
public int getWebPort() {
return ServerConfiguration.getInt(PropertyKey.MASTER_WEB_PORT);
}
@Override
public boolean isInSafeMode() {
return mSafeModeManager.isInSafeMode();
}
@Override
public MetaCommand masterHeartbeat(long masterId) {
MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId);
if (master == null) {
LOG.warn("Could not find master id: {} for heartbeat.", masterId);
return MetaCommand.MetaCommand_Register;
}
master.updateLastUpdatedTimeMs();
return MetaCommand.MetaCommand_Nothing;
}
@Override
public void masterRegister(long masterId, RegisterMasterPOptions options)
throws NotFoundException {
MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId);
if (master == null) {
throw new NotFoundException(ExceptionMessage.NO_MASTER_FOUND.getMessage(masterId));
}
master.updateLastUpdatedTimeMs();
mMasterConfigStore.registerNewConf(master.getAddress(), options.getConfigsList());
LOG.info("registerMaster(): master: {}", master);
}
@Override
public CheckpointName getCheckpointName() {
return CheckpointName.META_MASTER;
}
@Override
public String getClusterID() {
return mState.getClusterID();
}
@Override
public Iterator getJournalEntryIterator() {
return com.google.common.collect.Iterators.concat(mPathProperties.getJournalEntryIterator(),
mState.getJournalEntryIterator());
}
@Override
public boolean processJournalEntry(Journal.JournalEntry entry) {
return mState.processJournalEntry(entry) || mPathProperties.processJournalEntry(entry);
}
@Override
public void resetState() {
mState.resetState();
mPathProperties.resetState();
}
/**
* Lost master periodic check.
*/
private final class LostMasterDetectionHeartbeatExecutor implements HeartbeatExecutor {
/**
* Constructs a new {@link LostMasterDetectionHeartbeatExecutor}.
*/
public LostMasterDetectionHeartbeatExecutor() {
}
@Override
public void heartbeat() {
long masterTimeoutMs = ServerConfiguration.getMs(PropertyKey.MASTER_HEARTBEAT_TIMEOUT);
for (MasterInfo master : mMasters) {
synchronized (master) {
final long lastUpdate = mClock.millis() - master.getLastUpdatedTimeMs();
if (lastUpdate > masterTimeoutMs) {
LOG.error("The master {}({}) timed out after {}ms without a heartbeat!", master.getId(),
master.getAddress(), lastUpdate);
mLostMasters.add(master);
mMasters.remove(master);
mMasterConfigStore.handleNodeLost(master.getAddress());
}
}
}
}
@Override
public void close() {
// Nothing to clean up
}
}
/**
* Periodically log the config check report.
*/
private final class LogConfigReportHeartbeatExecutor implements HeartbeatExecutor {
private volatile boolean mFirst = true;
@Override
public void heartbeat() {
// Skip the first heartbeat since it happens before servers have time to register their
// configurations.
if (mFirst) {
mFirst = false;
} else {
mConfigChecker.logConfigReport();
}
}
@Override
public void close() {
// Nothing to clean up
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy