All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.block.DefaultBlockMaster Maven / Gradle / Ivy

The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.block;

import alluxio.Constants;
import alluxio.DefaultStorageTierAssoc;
import alluxio.Server;
import alluxio.StorageTierAssoc;
import alluxio.client.block.options.GetWorkerReportOptions;
import alluxio.client.block.options.GetWorkerReportOptions.WorkerRange;
import alluxio.clock.SystemClock;
import alluxio.collections.ConcurrentHashSet;
import alluxio.collections.IndexDefinition;
import alluxio.collections.IndexedSet;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.runtime.UnavailableRuntimeException;
import alluxio.exception.status.InvalidArgumentException;
import alluxio.exception.status.NotFoundException;
import alluxio.exception.status.UnavailableException;
import alluxio.grpc.Command;
import alluxio.grpc.CommandType;
import alluxio.grpc.ConfigProperty;
import alluxio.grpc.DecommissionWorkerPOptions;
import alluxio.grpc.GetRegisterLeasePRequest;
import alluxio.grpc.GrpcService;
import alluxio.grpc.GrpcUtils;
import alluxio.grpc.NodeState;
import alluxio.grpc.RegisterWorkerPOptions;
import alluxio.grpc.RegisterWorkerPRequest;
import alluxio.grpc.RemoveDisabledWorkerPOptions;
import alluxio.grpc.ServiceType;
import alluxio.grpc.StorageList;
import alluxio.grpc.WorkerLostStorageInfo;
import alluxio.heartbeat.FixedIntervalSupplier;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatExecutor;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.master.CoreMaster;
import alluxio.master.CoreMasterContext;
import alluxio.master.block.meta.MasterWorkerInfo;
import alluxio.master.block.meta.WorkerMetaLockSection;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.SingleEntryJournaled;
import alluxio.master.journal.checkpoint.CheckpointName;
import alluxio.master.journal.checkpoint.Checkpointed;
import alluxio.master.metastore.BlockMetaStore;
import alluxio.master.metastore.BlockMetaStore.Block;
import alluxio.master.metrics.MetricsMaster;
import alluxio.metrics.Metric;
import alluxio.metrics.MetricInfo;
import alluxio.metrics.MetricKey;
import alluxio.metrics.MetricsSystem;
import alluxio.proto.journal.Block.BlockContainerIdGeneratorEntry;
import alluxio.proto.journal.Block.BlockInfoEntry;
import alluxio.proto.journal.Block.DeleteBlockEntry;
import alluxio.proto.journal.Journal.JournalEntry;
import alluxio.proto.meta.Block.BlockLocation;
import alluxio.proto.meta.Block.BlockMeta;
import alluxio.resource.CloseableIterator;
import alluxio.resource.LockResource;
import alluxio.security.authentication.ClientContextServerInjector;
import alluxio.util.CommonUtils;
import alluxio.util.IdUtils;
import alluxio.util.ThreadFactoryUtils;
import alluxio.util.WaitForOptions;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceFactory;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.util.proto.BlockLocationUtils;
import alluxio.wire.Address;
import alluxio.wire.BlockInfo;
import alluxio.wire.RegisterLease;
import alluxio.wire.WorkerInfo;
import alluxio.wire.WorkerNetAddress;
import alluxio.wire.WorkerState;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.Striped;
import io.grpc.ServerInterceptors;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.UnknownHostException;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * This block master manages the metadata for all the blocks and block workers in Alluxio.
 */
@NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1664)
public class DefaultBlockMaster extends CoreMaster implements BlockMaster {
  private static final Set> DEPS =
      ImmutableSet.of(MetricsMaster.class);

  /**
   * The number of container ids to 'reserve' before having to journal container id state. This
   * allows the master to return container ids within the reservation, without having to write to
   * the journal.
   */
  private final long mContainerIdReservationSize = Configuration.getInt(
      PropertyKey.MASTER_CONTAINER_ID_RESERVATION_SIZE);

  /** The only valid key for {@link #mWorkerInfoCache}. */
  private static final String LIVE_WORKER_INFO_CACHE_KEY = "LiveWorkerInfoKey";

  private static final String LOST_WORKER_INFO_CACHE_KEY = "LostWorkerInfoKey";

  private final ExecutorService mContainerIdDetector = Executors
      .newSingleThreadExecutor(
        ThreadFactoryUtils.build("default-block-master-container-id-detection-%d", true));

  private volatile boolean mContainerIdDetectorIsIdle = true;

  // Worker metadata management.
  private static final IndexDefinition ID_INDEX =
      IndexDefinition.ofUnique(MasterWorkerInfo::getId);

  private static final IndexDefinition ADDRESS_INDEX =
      IndexDefinition.ofUnique(MasterWorkerInfo::getWorkerAddress);

  /**
   * Mapping between all possible storage level aliases and their ordinal position. This mapping
   * forms a total ordering on all storage level aliases in the system, and must be consistent
   * across masters.
   */
  private static final StorageTierAssoc MASTER_STORAGE_TIER_ASSOC =
      new DefaultStorageTierAssoc(
          PropertyKey.MASTER_TIERED_STORE_GLOBAL_LEVELS,
          PropertyKey.Template.MASTER_TIERED_STORE_GLOBAL_LEVEL_ALIAS);

  private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockMaster.class);

  private static final String WORKER_DISABLED =
      "Worker with address %s is manually decommissioned and marked not able to join "
          + "the cluster again. If you want this worker to register to the cluster again, "
          + "use `bin/alluxio fsadmin enableWorker -h ` command.";

  /**
   * Concurrency and locking in the BlockMaster
   *
   * The block master uses concurrent data structures to allow non-conflicting concurrent access.
   * This means each piece of metadata should be locked individually. There are two types of
   * metadata in the {@link DefaultBlockMaster}: block metadata and worker metadata.
   *
   * The worker metadata is represented by the {@link MasterWorkerInfo} object.
   * See javadoc of {@link MasterWorkerInfo} for details.
   *
   * To modify or read a modifiable piece of worker metadata, the {@link MasterWorkerInfo} for the
   * worker must be locked following the instructions in {@link MasterWorkerInfo}.
   * For block metadata, the id of the block must be locked.
   * This will protect the internal integrity of the block and worker metadata.
   *
   * A worker's relevant locks must be held to
   * - Check/Update the worker register status
   * - Read/Update the worker usage
   * - Read/Update the worker present/to-be-removed blocks
   * - Any combinations of the above
   *
   * A block's lock must be held to
   * - Perform any BlockStore operations on the block
   * - Add or remove the block from mLostBlocks
   *
   * Lock ordering must be preserved in order to prevent deadlock. If both worker and block
   * metadata must be locked at the same time, the worker metadata must be locked before the block
   * metadata. When the locks are released, they must be released in the opposite order.
   *
   * Locking on the worker metadata are managed by
   * {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)}.
   * This guarantees when multiple parts of the worker metadata are accessed/updated,
   * the locks are acquired and released in order.
   * See javadoc of {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)} for
   * example usages.
   *
   * It should not be the case that multiple worker metadata must be locked at the same time, or
   * multiple block metadata must be locked at the same time. Operations involving different workers
   * or different blocks should be able to be performed independently.
   */

  /**
   * 10k locks balances between keeping a small memory footprint and avoiding unnecessary lock
   * contention. Each stripe is around 100 bytes, so this takes about 1MB. Block locking critical
   * sections are short, so it is acceptable to occasionally have conflicts where two different
   * blocks want to lock the same stripe.
   */
  private final Striped mBlockLocks = Striped.lock(10_000);
  /** Manages block metadata and block locations. */
  private final BlockMetaStore mBlockMetaStore;

  /** Keeps track of blocks which are no longer in Alluxio storage. */
  private final ConcurrentHashSet mLostBlocks = new ConcurrentHashSet<>(64, 0.90f, 64);

  /** This state must be journaled. */
  @GuardedBy("itself")
  private final BlockContainerIdGenerator mBlockContainerIdGenerator =
      new BlockContainerIdGenerator();

  /** Keeps track of workers which are in communication with the master. */
  private final IndexedSet mWorkers =
      new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
  /** Keeps track of workers which are no longer in communication with the master. */
  private final IndexedSet mLostWorkers =
      new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
  /** Worker is not visualable until registration completes. */
  private final IndexedSet mTempWorkers =
      new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
  private final Set mRejectWorkers = new ConcurrentHashSet<>();
  /**
   * Keeps track of workers which have been decommissioned.
   * For we need to distinguish the lost worker accidentally and the decommissioned worker manually.
   */
  private final IndexedSet mDecommissionedWorkers =
      new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);

  /**
   * Tracks the open register streams.
   * A stream will be closed if it is completed, aborted due to an error,
   * or recycled due to inactivity by {@link WorkerRegisterStreamGCExecutor}.
   */
  private final Map mActiveRegisterContexts =
      new ConcurrentHashMap<>();

  /** Listeners to call when lost workers are found. */
  private final List> mLostWorkerFoundListeners
      = new ArrayList<>();

  /** Listeners to call when workers are lost. */
  private final List> mWorkerLostListeners = new ArrayList<>();

  /** Listeners to call when workers are delete. */
  private final List> mWorkerDeleteListeners = new ArrayList<>();

  /** Listeners to call when a new worker registers. */
  private final List>> mWorkerRegisteredListeners
      = new ArrayList<>();

  /** Handle to the metrics master. */
  private final MetricsMaster mMetricsMaster;

  /* The value of the 'next container id' last journaled. */
  @GuardedBy("mBlockContainerIdGenerator")
  private volatile long mJournaledNextContainerId = 0;

  /**
   * A loading cache for worker info list, refresh periodically.
   * This cache has two keys {@link #LIVE_WORKER_INFO_CACHE_KEY},
   * {@link #LOST_WORKER_INFO_CACHE_KEY}.
   */
  private final LoadingCache> mWorkerInfoCache;

  private final RegisterLeaseManager mRegisterLeaseManager = new RegisterLeaseManager();

  private final HashMap mWorkerIdMap = new HashMap<>();

  private final boolean mWorkerRegisterToAllMasters = Configuration.getBoolean(
      PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS);

  private final boolean mStandbyMasterRpcEnabled = Configuration.getBoolean(
      PropertyKey.STANDBY_MASTER_GRPC_ENABLED);

  /**
   * Creates a new instance of {@link DefaultBlockMaster}.
   *
   * @param metricsMaster the metrics master
   * @param masterContext the context for Alluxio master
   */
  DefaultBlockMaster(MetricsMaster metricsMaster, CoreMasterContext masterContext) {
    this(metricsMaster, masterContext, new SystemClock(),
        ExecutorServiceFactories.cachedThreadPool(Constants.BLOCK_MASTER_NAME));
  }

  private DefaultBlockMaster(MetricsMaster metricsMaster, CoreMasterContext masterContext,
      Clock clock, ExecutorServiceFactory executorServiceFactory, BlockMetaStore blockMetaStore) {
    super(masterContext, clock, executorServiceFactory);
    Preconditions.checkNotNull(metricsMaster, "metricsMaster");

    mBlockMetaStore = blockMetaStore;
    mMetricsMaster = metricsMaster;
    Metrics.registerGauges(this);

    mWorkerInfoCache = CacheBuilder.newBuilder()
        .refreshAfterWrite(Configuration
            .getMs(PropertyKey.MASTER_WORKER_INFO_CACHE_REFRESH_TIME), TimeUnit.MILLISECONDS)
        .build(new CacheLoader>() {
          @Override
          public List load(String key) throws UnavailableException {
            switch (key) {
              case LIVE_WORKER_INFO_CACHE_KEY:
                return constructWorkerInfoList();
              case LOST_WORKER_INFO_CACHE_KEY:
                return getLostWorkersInfoListInternal();
              default:
                return constructWorkerInfoList();
            }
          }
        });

    MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_LOST_BLOCK_COUNT.getName(),
        this::getLostBlocksCount);
    MetricsSystem.registerCachedGaugeIfAbsent(MetricKey.MASTER_TO_REMOVE_BLOCK_COUNT.getName(),
        this::getToRemoveBlockCount, 30, TimeUnit.SECONDS);
  }

  /**
   * Creates a new instance of {@link DefaultBlockMaster}.
   * Used for tests where we manually control the clock.
   *
   * @param metricsMaster the metrics master
   * @param masterContext the context for Alluxio master
   * @param clock the clock to use for determining the time
   * @param executorServiceFactory a factory for creating the executor service to use for running
   *        maintenance threads
   */
  @VisibleForTesting
  public DefaultBlockMaster(MetricsMaster metricsMaster, CoreMasterContext masterContext,
      Clock clock, ExecutorServiceFactory executorServiceFactory) {
    this(metricsMaster, masterContext, clock, executorServiceFactory,
        masterContext.getBlockStoreFactory().get());
  }

  @Override
  public String getName() {
    return Constants.BLOCK_MASTER_NAME;
  }

  @Override
  public Map getServices() {
    Map services = new HashMap<>();
    services.put(ServiceType.BLOCK_MASTER_CLIENT_SERVICE,
        new GrpcService(ServerInterceptors
            .intercept(new BlockMasterClientServiceHandler(this),
                new ClientContextServerInjector())));
    services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE,
        new GrpcService(ServerInterceptors
            .intercept(new BlockMasterWorkerServiceHandler(this),
                new ClientContextServerInjector())));
    return services;
  }

  @Override
  public Map getStandbyServices() {
    if (Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS)) {
      return getServices();
    }
    return Collections.emptyMap();
  }

  @Override
  public boolean processJournalEntry(JournalEntry entry) {
    // TODO(gene): A better way to process entries besides a huge switch?
    if (entry.hasBlockContainerIdGenerator()) {
      mJournaledNextContainerId = (entry.getBlockContainerIdGenerator()).getNextContainerId();
      mBlockContainerIdGenerator.setNextContainerId((mJournaledNextContainerId));
    } else if (entry.hasDeleteBlock()) {
      mBlockMetaStore.removeBlock(entry.getDeleteBlock().getBlockId());
    } else if (entry.hasBlockInfo()) {
      BlockInfoEntry blockInfoEntry = entry.getBlockInfo();
      long length = blockInfoEntry.getLength();
      Optional block = mBlockMetaStore.getBlock(blockInfoEntry.getBlockId());
      if (block.isPresent()) {
        // If we write multiple replicas, multiple streams will all write BlockInfoEntry
        // when they CommitBlock. We rely on the idempotence to handle duplicate entries
        // and only warning when there are inconsistencies.
        long oldLen = block.get().getLength();
        if (oldLen != Constants.UNKNOWN_SIZE) {
          LOG.warn("Attempting to update block length ({}) to a different length ({}).", oldLen,
              length);
          return true;
        }
      }
      mBlockMetaStore.putBlock(blockInfoEntry.getBlockId(),
          BlockMeta.newBuilder().setLength(blockInfoEntry.getLength()).build());
      // This can be called when
      // 1. The master is replaying the journal.
      // 2. A standby master is applying a journal entry from the primary master.
      if (blockInfoEntry.hasBlockLocation()) {
        alluxio.grpc.BlockLocation blockLocation = blockInfoEntry.getBlockLocation();
        long workerId = blockLocation.getWorkerId();
        MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
        if (worker == null) {
          // The master is replaying journal or somehow the worker is not there anymore
          // We do not add the BlockLocation because the workerId is not reliable anymore
          // If the worker comes back, it will register and BlockLocation will be added then
          return true;
        }
        // The master is running and the journal is from an existing worker
        mBlockMetaStore.addLocation(blockInfoEntry.getBlockId(), BlockLocationUtils.getCached(
            workerId, blockLocation.getTierAlias(), blockLocation.getMediumType())
        );

        worker.addBlock(blockInfoEntry.getBlockId());
        LOG.debug("Added BlockLocation for {} to worker {}", blockInfoEntry.getBlockId(), workerId);
      }
    } else {
      return false;
    }
    return true;
  }

  @Override
  public void resetState() {
    mBlockMetaStore.clear();
    mJournaledNextContainerId = 0;
    mBlockContainerIdGenerator.setNextContainerId(0);
  }

  @Override
  public CheckpointName getCheckpointName() {
    if (mBlockMetaStore instanceof Checkpointed) {
      return ((Checkpointed) mBlockMetaStore).getCheckpointName();
    }
    return CheckpointName.BLOCK_MASTER;
  }

  @Override
  public CompletableFuture writeToCheckpoint(File directory,
                                                   ExecutorService executorService) {
    if (mBlockMetaStore instanceof Checkpointed) {
      SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled();
      containerIdJournal.processJournalEntry(getContainerIdJournalEntry());
      return CompletableFuture.allOf((
          (Checkpointed) mBlockMetaStore).writeToCheckpoint(directory, executorService),
          containerIdJournal.writeToCheckpoint(directory, executorService));
    }
    return super.writeToCheckpoint(directory, executorService);
  }

  @Override
  public CompletableFuture restoreFromCheckpoint(File directory,
                                                       ExecutorService executorService) {
    if (mBlockMetaStore instanceof Checkpointed) {
      SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled();
      return CompletableFuture.allOf((
          (Checkpointed) mBlockMetaStore).restoreFromCheckpoint(directory, executorService),
          containerIdJournal.restoreFromCheckpoint(directory, executorService)
              .thenRun(() -> processJournalEntry(containerIdJournal.getEntry())));
    }
    return super.restoreFromCheckpoint(directory, executorService);
  }

  @Override
  public CloseableIterator getJournalEntryIterator() {
    CloseableIterator blockStoreIterator = mBlockMetaStore.getCloseableIterator();
    Iterator journalIterator = new Iterator() {
      @Override
      public boolean hasNext() {
        return blockStoreIterator.hasNext();
      }

      @Override
      public JournalEntry next() {
        if (!hasNext()) {
          throw new NoSuchElementException();
        }
        /*
         * When the BlockStore is RocksBlockMetaStore, thread safety is embedded in the iterator.
         * So no need to worry if the RocksDB is closed while this iterator is active.
         */
        Block block = blockStoreIterator.next();
        BlockInfoEntry blockInfoEntry =
            BlockInfoEntry.newBuilder().setBlockId(block.getId())
                .setLength(block.getMeta().getLength()).build();
        return JournalEntry.newBuilder().setBlockInfo(blockInfoEntry).build();
      }

      @Override
      public void remove() {
        throw new UnsupportedOperationException("BlockMaster#Iterator#remove is not supported.");
      }
    };

    CloseableIterator journalCloseableIterator =
        CloseableIterator.create(journalIterator, (whatever) -> blockStoreIterator.close());

    return CloseableIterator.concat(
        CloseableIterator.noopCloseable(
            CommonUtils.singleElementIterator(getContainerIdJournalEntry())),
        journalCloseableIterator);
  }

  /**
   * Periodically checks the open worker register streams.
   * If a stream has been active for a while, close the stream, recycle resources and locks,
   * and propagate an error to the worker side.
   */
  public class WorkerRegisterStreamGCExecutor implements HeartbeatExecutor {
    private final long mTimeout = Configuration.global()
        .getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT);

    @Override
    public void heartbeat(long timeLimitMs) {
      AtomicInteger removedSessions = new AtomicInteger(0);
      mActiveRegisterContexts.entrySet().removeIf((entry) -> {
        WorkerRegisterContext context = entry.getValue();
        final long clockTime = mClock.millis();
        final long lastActivityTime = context.getLastActivityTimeMs();
        final long staleTime = clockTime - lastActivityTime;
        if (staleTime < mTimeout) {
          return false;
        }
        String msg = String.format(
            "ClockTime: %d, LastActivityTime: %d. Worker %d register stream hanging for %sms!"
                + " Tune up %s if this is undesired.",
            clockTime, lastActivityTime, context.getWorkerInfo().getId(), staleTime,
            PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT);
        Exception e = new TimeoutException(msg);
        try {
          context.closeWithError(e);
        } catch (Throwable t) {
          t.addSuppressed(e);
          LOG.error("Failed to close an open register stream for worker {}. "
              + "The stream has been open for {}ms.", context.getWorkerId(), staleTime, t);
          // Do not remove the entry so this will be retried
          return false;
        }
        removedSessions.getAndDecrement();
        return true;
      });
      if (removedSessions.get() > 0) {
        LOG.info("Removed {} stale worker registration streams", removedSessions.get());
      }
    }

    @Override
    public void close() {
      // Nothing to clean up
    }
  }

  @Override
  public void start(Boolean isLeader) throws IOException {
    super.start(isLeader);
    if (isLeader || mWorkerRegisterToAllMasters) {
      getExecutorService().submit(new HeartbeatThread(
          HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(),
          () -> new FixedIntervalSupplier(
              Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL)),
          Configuration.global(), mMasterContext.getUserState()));
    }

    // This periodically scans all open register streams and closes hanging ones
    getExecutorService().submit(new HeartbeatThread(
          HeartbeatContext.MASTER_WORKER_REGISTER_SESSION_CLEANER,
            new WorkerRegisterStreamGCExecutor(),
            () -> new FixedIntervalSupplier(Configuration.getMs(
                PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT)),
            Configuration.global(), mMasterContext.getUserState()));
  }

  @Override
  public void stop() throws IOException {
    LOG.info("Next container id before close: {}", mBlockContainerIdGenerator.peekNewContainerId());
    super.stop();
  }

  @Override
  public void close() throws IOException {
    super.close();
    mBlockMetaStore.close();

    mContainerIdDetector.shutdown();
    try {
      mContainerIdDetector.awaitTermination(5000, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
      LOG.warn("Container id detection executor did not shut down in a timely manner: {}",
          e.toString());
    }
  }

  @Override
  public int getWorkerCount() {
    return mWorkers.size();
  }

  @Override
  public int getLostWorkerCount() {
    return mLostWorkers.size();
  }

  @Override
  public int getDecommissionedWorkerCount() {
    return mDecommissionedWorkers.size();
  }

  @Override
  public long getCapacityBytes() {
    long ret = 0;
    for (MasterWorkerInfo worker : mWorkers) {
      try (LockResource r = worker.lockWorkerMeta(
          EnumSet.of(WorkerMetaLockSection.USAGE), true)) {
        ret += worker.getCapacityBytes();
      }
    }
    return ret;
  }

  @Override
  public long getUniqueBlockCount() {
    return mBlockMetaStore.size();
  }

  @Override
  public long getBlockReplicaCount() {
    long ret = 0;
    for (MasterWorkerInfo worker : mWorkers) {
      ret += worker.getBlockCount();
    }
    return ret;
  }

  @Override
  public StorageTierAssoc getGlobalStorageTierAssoc() {
    return MASTER_STORAGE_TIER_ASSOC;
  }

  @Override
  public long getUsedBytes() {
    long ret = 0;
    for (MasterWorkerInfo worker : mWorkers) {
      try (LockResource r = worker.lockWorkerMeta(
          EnumSet.of(WorkerMetaLockSection.USAGE), true)) {
        ret += worker.getUsedBytes();
      }
    }
    return ret;
  }

  @Override
  public List getWorkerInfoList() throws UnavailableException {
    if (mSafeModeManager.isInSafeMode()) {
      throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }
    try {
      return mWorkerInfoCache.get(LIVE_WORKER_INFO_CACHE_KEY);
    } catch (ExecutionException e) {
      throw new UnavailableException("Unable to get worker info list from cache", e);
    }
  }

  private List constructWorkerInfoList() {
    // TODO(jiacheng): investigate why this cache is refreshed so many times by the
    //  alluxio.master.scheduler.Scheduler L239
    List workerInfoList = new ArrayList<>(mWorkers.size());
    for (MasterWorkerInfo worker : mWorkers) {
      // extractWorkerInfo handles the locking internally
      workerInfoList.add(extractWorkerInfo(worker,
          GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LIVE));
    }
    return workerInfoList;
  }

  @Override
  public List getLostWorkersInfoList() throws UnavailableException {
    if (mSafeModeManager.isInSafeMode()) {
      throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }
    try {
      return mWorkerInfoCache.get(LOST_WORKER_INFO_CACHE_KEY);
    } catch (ExecutionException e) {
      throw new UnavailableException("Unable to get worker info list from cache", e);
    }
  }

  private List getLostWorkersInfoListInternal() throws UnavailableException {
    if (mSafeModeManager.isInSafeMode()) {
      throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }
    List workerInfoList = new ArrayList<>(mLostWorkers.size());
    for (MasterWorkerInfo worker : mLostWorkers) {
      // extractWorkerInfo handles the locking internally
      workerInfoList.add(extractWorkerInfo(worker,
          GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LOST));
    }
    workerInfoList.sort(new WorkerInfo.LastContactSecComparator());
    return workerInfoList;
  }

  @Override
  public void removeDisabledWorker(RemoveDisabledWorkerPOptions requestOptions)
          throws NotFoundException {
    if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) {
      throw new UnavailableRuntimeException(
          "RemoveDisabledWorker operation is not supported on standby masters");
    }
    String workerHostName = requestOptions.getWorkerHostname();
    long workerWebPort = requestOptions.getWorkerWebPort();
    AtomicBoolean found = new AtomicBoolean(false);
    mRejectWorkers.removeIf(entry -> {
      if (entry.getHost().equals(workerHostName) && entry.getWebPort() == workerWebPort) {
        LOG.info("Received admin command to re-accept worker {}. The worker should be "
            + "accepted to the cluster when it registers again.", entry);
        found.set(true);
        return true;
      }
      return false;
    });
    if (!found.get()) {
      LOG.info("Received admin command to re-accept worker {} but the worker is "
          + "not decommissioned. The worker will be able to register to the cluster normally. "
          + "No further action is required.", workerHostName);
    }
  }

  @Override
  public Set getWorkerAddresses() throws UnavailableException {
    if (mSafeModeManager.isInSafeMode()) {
      throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }
    Set workerAddresses = new HashSet<>(mWorkers.size());
    for (MasterWorkerInfo worker : mWorkers) {
      // worker net address is unmodifiable after initialization, no locking is needed
      workerAddresses.add(worker.getWorkerAddress());
    }
    return workerAddresses;
  }

  @Override
  public List getWorkerReport(GetWorkerReportOptions options)
      throws UnavailableException, InvalidArgumentException {
    if (mSafeModeManager.isInSafeMode()) {
      throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }

    Set selectedLiveWorkers = new HashSet<>();
    Set selectedLostWorkers = new HashSet<>();
    Set selectedDecommissionedWorkers = new HashSet<>();
    WorkerRange workerRange = options.getWorkerRange();
    switch (workerRange) {
      case ALL:
        selectedLiveWorkers.addAll(mWorkers);
        selectedLostWorkers.addAll(mLostWorkers);
        selectedDecommissionedWorkers.addAll(mDecommissionedWorkers);
        break;
      case LIVE:
        selectedLiveWorkers.addAll(mWorkers);
        break;
      case LOST:
        selectedLostWorkers.addAll(mLostWorkers);
        break;
      case DECOMMISSIONED:
        selectedDecommissionedWorkers.addAll(mDecommissionedWorkers);
        break;
      case SPECIFIED:
        Set addresses = options.getAddresses();
        Set workerNames = new HashSet<>();

        selectedLiveWorkers = selectInfoByAddress(addresses, mWorkers, workerNames);
        selectedLostWorkers = selectInfoByAddress(addresses, mLostWorkers, workerNames);
        selectedDecommissionedWorkers = selectInfoByAddress(addresses,
            mDecommissionedWorkers, workerNames);

        if (!addresses.isEmpty()) {
          String info = String.format("Unrecognized worker names: %s%n"
                  + "Supported worker names: %s%n",
                  addresses, workerNames);
          throw new InvalidArgumentException(info);
        }
        break;
      default:
        throw new InvalidArgumentException("Unrecognized worker range: " + workerRange);
    }

    List workerInfoList = new ArrayList<>(
        selectedLiveWorkers.size() + selectedLostWorkers.size()
            + selectedDecommissionedWorkers.size());
    for (MasterWorkerInfo worker : selectedLiveWorkers) {
      // extractWorkerInfo handles the locking internally
      if (mRejectWorkers.contains(worker.getWorkerAddress())) {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(),
            WorkerState.DISABLED));
      } else {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LIVE));
      }
    }
    for (MasterWorkerInfo worker : selectedLostWorkers) {
      // extractWorkerInfo handles the locking internally
      if (mRejectWorkers.contains(worker.getWorkerAddress())) {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(),
            WorkerState.DISABLED));
      } else {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST));
      }
    }
    for (MasterWorkerInfo worker : selectedDecommissionedWorkers) {
      // extractWorkerInfo handles the locking internally
      if (mRejectWorkers.contains(worker.getWorkerAddress())) {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(),
            WorkerState.DISABLED));
      } else {
        workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(),
            WorkerState.DECOMMISSIONED));
      }
    }
    return workerInfoList;
  }

  /**
   * Locks the {@link MasterWorkerInfo} properly and convert it to a {@link WorkerInfo}.
   */
  private WorkerInfo extractWorkerInfo(MasterWorkerInfo worker,
      Set fieldRange, WorkerState workerState) {
    try (LockResource r = worker.lockWorkerMetaForInfo(fieldRange)) {
      return worker.generateWorkerInfo(fieldRange, workerState);
    }
  }

  @Override
  public List getWorkerLostStorage() {
    List workerLostStorageList = new ArrayList<>();
    for (MasterWorkerInfo worker : mWorkers) {
      try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.USAGE), true)) {
        if (worker.hasLostStorage()) {
          Map lostStorage = worker.getLostStorage().entrySet()
              .stream().collect(Collectors.toMap(Map.Entry::getKey,
                  e -> StorageList.newBuilder().addAllStorage(e.getValue()).build()));
          workerLostStorageList.add(WorkerLostStorageInfo.newBuilder()
              .setAddress(GrpcUtils.toProto(worker.getWorkerAddress()))
              .putAllLostStorage(lostStorage).build());
        }
      }
    }
    return workerLostStorageList;
  }

  @Override
  public void removeBlocks(Collection blockIds, boolean delete) throws UnavailableException {
    try (JournalContext journalContext = createJournalContext()) {
      for (long blockId : blockIds) {
        Set workerIds;
        try (LockResource r = lockBlock(blockId)) {
          Optional block = mBlockMetaStore.getBlock(blockId);
          if (!block.isPresent()) {
            continue;
          }
          List locations = mBlockMetaStore.getLocations(blockId);
          workerIds = new HashSet<>(locations.size());
          for (BlockLocation loc : locations) {
            workerIds.add(loc.getWorkerId());
          }
          // Two cases here:
          // 1) For delete: delete the block metadata.
          // 2) For free: keep the block metadata. mLostBlocks will be changed in
          // processWorkerRemovedBlocks
          if (delete) {
            // Make sure blockId is removed from mLostBlocks when the block metadata is deleted.
            // Otherwise blockId in mLostBlock can be dangling index if the metadata is gone.
            mLostBlocks.remove(blockId);
            mBlockMetaStore.removeBlock(blockId);
            JournalEntry entry = JournalEntry.newBuilder()
                .setDeleteBlock(DeleteBlockEntry.newBuilder().setBlockId(blockId)).build();
            journalContext.append(entry);
          }
        }

        // Outside of locking the block. This does not have to be synchronized with the block
        // metadata, since it is essentially an asynchronous signal to the worker to remove the
        // block.
        // TODO(jiacheng): if the block locations are changed (like a new worker is registered
        //  with the block), the block will not be freed ever. The locking logic in
        //  workerRegister should be changed to address this race condition.
        for (long workerId : workerIds) {
          // No need to update if the worker is lost or decommissioned
          // When that lost/decommissioned worker registers again, those removed blocks
          // will not be recognized, and the master will instruct the worker to remove them anyway
          MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
          if (worker != null) {
            try (LockResource r = worker.lockWorkerMeta(
                EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) {
              worker.updateToRemovedBlock(true, blockId);
            }
          }
        }
      }
    }
  }

  @Override
  public boolean isRejected(WorkerNetAddress address) {
    return mRejectWorkers.contains(address);
  }

  @Override
  public void decommissionWorker(DecommissionWorkerPOptions requestOptions)
      throws NotFoundException {
    String workerHostName = requestOptions.getWorkerHostname();
    long workerWebPort = requestOptions.getWorkerWebPort();
    boolean canRegisterAgain = requestOptions.getCanRegisterAgain();
    LOG.info("Decommissioning worker {}:{}", requestOptions.getWorkerHostname(),
        requestOptions.getWorkerWebPort());
    for (MasterWorkerInfo workerInfo : mWorkers) {
      WorkerNetAddress address = workerInfo.getWorkerAddress();
      if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) {
        LOG.info("Found worker to decommission {}", workerInfo.getWorkerAddress());
        try (LockResource r = workerInfo.lockWorkerMeta(
            EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) {
          processDecommissionedWorker(workerInfo, canRegisterAgain);
        }
        LOG.info("Worker {}@{}:{} has been added to the decommissionedWorkers set.",
            workerInfo.getId(), workerHostName, workerWebPort);
        return;
      }
    }
    // The worker is not active, but it has been decommissioned from a previous call
    for (MasterWorkerInfo workerInfo : mDecommissionedWorkers) {
      WorkerNetAddress address = workerInfo.getWorkerAddress();
      if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) {
        LOG.info("Worker {}@{}:{} has been decommissioned already",
            workerInfo.getId(), workerHostName, workerWebPort);
        return;
      }
    }
    // If the worker is about to register, it may register back even if we decommission it
    // here. So we let the admin wait until the worker is registered, to reduce the number of
    // states to manage.
    for (MasterWorkerInfo workerInfo : mTempWorkers) {
      WorkerNetAddress address = workerInfo.getWorkerAddress();
      if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) {
        throw new NotFoundException(ExceptionMessage.WORKER_DECOMMISSIONED_BEFORE_REGISTER
            .getMessage(workerHostName + ":" + workerWebPort));
      }
    }
    // If the worker is lost, we guess it is more likely that the worker will not come back
    // immediately
    for (MasterWorkerInfo workerInfo : mLostWorkers) {
      WorkerNetAddress address = workerInfo.getWorkerAddress();
      if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) {
        LOG.info("Found worker to decommission {} from lost workers",
            workerInfo.getWorkerAddress());
        try (LockResource r = workerInfo.lockWorkerMeta(
            EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) {
          processDecommissionedWorker(workerInfo, canRegisterAgain);
        }
        LOG.info("A lost worker {}@{}:{} has been added to the decommissionedWorkers set.",
            workerInfo.getId(), workerHostName, workerWebPort);
        return;
      }
    }
    throw new NotFoundException(ExceptionMessage.WORKER_NOT_FOUND
        .getMessage(workerHostName + ":" + workerWebPort));
  }

  /**
   * @return mJournaledNextContainerId
   */
  @Override
  public long getJournaledNextContainerId() {
    return mJournaledNextContainerId;
  }

  /**
   * @return a new block container id
   */
  @Override
  public long getNewContainerId() throws UnavailableException {
    long containerId = mBlockContainerIdGenerator.getNewContainerId();
    if (containerId >= (mJournaledNextContainerId - mContainerIdReservationSize / 2)) {
      if (containerId >= mJournaledNextContainerId) {
        synchronized (mBlockContainerIdGenerator) {
          // This container id is not safe with respect to the last journaled container id.
          // Therefore, journal the new state of the container id. This implies that when a master
          // crashes, the container ids within the reservation which have not been used yet will
          // never be used. This is a tradeoff between fully utilizing the container id space, vs.
          // improving master scalability.

          // Set the next id to journal with a reservation of container ids, to avoid having to
          // write to the journal for ids within the reservation.
          long possibleMaxContainerId = mBlockContainerIdGenerator.getNextContainerId();
          if (possibleMaxContainerId >= mJournaledNextContainerId) {
            mJournaledNextContainerId = possibleMaxContainerId + mContainerIdReservationSize;
            try (JournalContext journalContext = createJournalContext()) {
              // This must be flushed while holding the lock on mBlockContainerIdGenerator, in
              // order to prevent subsequent calls to return ids that have not been journaled
              // and flushed.
              journalContext.append(getContainerIdJournalEntry());
            }
          }
        }
      } else {
        if (mContainerIdDetectorIsIdle) {
          synchronized (mBlockContainerIdGenerator) {
            if (mContainerIdDetectorIsIdle) {
              mContainerIdDetectorIsIdle = false;
              mContainerIdDetector.submit(() -> {
                try {
                  synchronized (mBlockContainerIdGenerator) {
                    long possibleMaxContainerId = mBlockContainerIdGenerator.getNextContainerId();

                    if (possibleMaxContainerId
                        >= (mJournaledNextContainerId - mContainerIdReservationSize / 2)) {
                      mJournaledNextContainerId = possibleMaxContainerId
                          + mContainerIdReservationSize;
                      try (JournalContext journalContext = createJournalContext()) {
                        journalContext.append(getContainerIdJournalEntry());
                      }
                    }
                  }
                } catch (UnavailableException e) {
                  LOG.error("Container Id Detector failed", e);
                }

                mContainerIdDetectorIsIdle = true;
              });
            }
          }
        }
      }
    }

    return containerId;
  }

  /**
   * @return a {@link JournalEntry} representing the state of the container id generator
   */
  private JournalEntry getContainerIdJournalEntry() {
    synchronized (mBlockContainerIdGenerator) {
      BlockContainerIdGeneratorEntry blockContainerIdGenerator =
          BlockContainerIdGeneratorEntry.newBuilder().setNextContainerId(mJournaledNextContainerId)
              .build();
      return JournalEntry.newBuilder().setBlockContainerIdGenerator(blockContainerIdGenerator)
          .build();
    }
  }

  // TODO(binfan): check the logic is correct or not when commitBlock is a retry
  @Override
  public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias,
      String mediumType, long blockId, long length)
      throws NotFoundException, UnavailableException {
    LOG.debug("Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}",
        workerId, usedBytesOnTier, blockId, length);
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    // TODO(peis): Check lost workers as well.
    if (worker == null) {
      /*
       * If the worker is not recognized:
       * 1. [Probably] The worker has been decommissioned and removed from the active worker list
       * 2. [Possible] The worker has not finished its register process. Maybe the master has
       *    failed over and the worker has not registered to this new primary.
       * 3. [Unlikely] The worker does not belong to this cluster and has never registered.
       *    This is unlikely because the worker has an ID and it must be from some master.
       * 4. [Unlikely] The worker is lost to the master. This is unlikely because the CommitBlock
       *    call is from the worker. This is more possibly the master is busy and did not
       *    handle the worker's heartbeat message for too long.
       */
      worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId);
      if (worker == null) {
        throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
      } else {
        WorkerNetAddress addr = worker.getWorkerAddress();
        LOG.info("Committing blocks from a decommissioned worker {}",
            addr.getHost() + ":" + addr.getRpcPort());
        /*
         * Even though the worker is now decommissioned, the master still accepts the block
         * and updates the BlockLocation normally.
         * Updating the BlockLocation is not strictly necessary, because when the worker
         * registers again after restart, all locations will be rebuilt.
         * But for simplicity, the location is still updated.
         * A disabled worker is allowed to commit block, so ongoing operations will succeed.
         */
      }
    }

    try (JournalContext journalContext = createJournalContext()) {
      // Lock the worker metadata here to preserve the lock order
      // The worker metadata must be locked before the blocks
      try (LockResource lr = worker.lockWorkerMeta(
          EnumSet.of(WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) {
        try (LockResource r = lockBlock(blockId)) {
          Optional block = mBlockMetaStore.getBlock(blockId);
          if (!block.isPresent() || block.get().getLength() != length) {
            if (block.isPresent() && block.get().getLength() != Constants.UNKNOWN_SIZE) {
              LOG.warn("Rejecting attempt to change block length from {} to {}",
                  block.get().getLength(), length);
            } else {
              mBlockMetaStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build());
              BlockInfoEntry.Builder blockInfoBuilder =
                  BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length);
              if (mWorkerRegisterToAllMasters) {
                blockInfoBuilder
                    .setBlockId(blockId)
                    .setLength(length)
                    .setBlockLocation(
                        alluxio.grpc.BlockLocation.newBuilder()
                            .setWorkerId(workerId)
                            .setMediumType(mediumType)
                            .setTierAlias(tierAlias)
                            // Worker addresses are not journaled because adding a block location
                            // into the meta store only needs a worker id.
                            .build()
                    );
              }
              journalContext.append(
                  JournalEntry.newBuilder().setBlockInfo(blockInfoBuilder.build()).build());
            }
          }
          // Update the block metadata with the new worker location.
          mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached(
              workerId, tierAlias, mediumType));
          // This worker has this block, so it is no longer lost.
          mLostBlocks.remove(blockId);

          // Update the worker information for this new block.
          // TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not
          // double counted.
          worker.addBlock(blockId);
          worker.updateUsedBytes(tierAlias, usedBytesOnTier);
        }
      }

      worker.updateLastUpdatedTimeMs();
    }
  }

  @Override
  public void commitBlockInUFS(long blockId, long length, JournalContext journalContext) {
    LOG.debug("Commit block in ufs. blockId: {}, length: {}", blockId, length);
    try (LockResource r = lockBlock(blockId)) {
      if (mBlockMetaStore.getBlock(blockId).isPresent()) {
        // Block metadata already exists, so do not need to create a new one.
        return;
      }
      mBlockMetaStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build());
      BlockInfoEntry blockInfo =
          BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
      journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
    }
  }

  @Override
  public BlockInfo getBlockInfo(long blockId) throws BlockInfoException, UnavailableException {
    return generateBlockInfo(blockId)
        .orElseThrow(() -> new BlockInfoException(ExceptionMessage.BLOCK_META_NOT_FOUND, blockId));
  }

  @Override
  public List getBlockInfoList(List blockIds) throws UnavailableException {
    List ret = new ArrayList<>(blockIds.size());
    for (long blockId : blockIds) {
      generateBlockInfo(blockId).ifPresent(ret::add);
    }
    return ret;
  }

  @Override
  public Map getTotalBytesOnTiers() {
    Map ret = new HashMap<>();
    for (MasterWorkerInfo worker : mWorkers) {
      try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.USAGE), true)) {
        for (Map.Entry entry : worker.getTotalBytesOnTiers().entrySet()) {
          Long total = ret.get(entry.getKey());
          ret.put(entry.getKey(), (total == null ? 0L : total) + entry.getValue());
        }
      }
    }
    return ret;
  }

  @Override
  public Map getUsedBytesOnTiers() {
    Map ret = new HashMap<>();
    for (MasterWorkerInfo worker : mWorkers) {
      try (LockResource r = worker.lockWorkerMeta(
          EnumSet.of(WorkerMetaLockSection.USAGE), true)) {
        for (Map.Entry entry : worker.getUsedBytesOnTiers().entrySet()) {
          Long used = ret.get(entry.getKey());
          ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue());
        }
      }
    }
    return ret;
  }

  /**
   * Find a worker which is considered lost or just gets its id.
   * @param workerNetAddress the address used to find a worker
   * @return a {@link MasterWorkerInfo} which is presented in master but not registered,
   *         or null if not worker is found.
   */
  @Nullable
  private MasterWorkerInfo findUnregisteredWorker(WorkerNetAddress workerNetAddress) {
    for (IndexedSet workers: Arrays.asList(mTempWorkers,
        mLostWorkers, mDecommissionedWorkers)) {
      MasterWorkerInfo worker = workers.getFirstByField(ADDRESS_INDEX, workerNetAddress);
      if (worker != null) {
        return worker;
      }
    }
    return null;
  }

  /**
   * Find a worker which is considered lost or just gets its id.
   * @param workerId the id used to find a worker
   * @return a {@link MasterWorkerInfo} which is presented in master but not registered,
   *         or null if not worker is found.
   */
  @Nullable
  private MasterWorkerInfo findUnregisteredWorker(long workerId) {
    for (IndexedSet workers: Arrays.asList(mTempWorkers,
        mLostWorkers, mDecommissionedWorkers)) {
      MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId);
      if (worker != null) {
        return worker;
      }
    }
    return null;
  }

  /**
   * Re-register a lost worker or complete registration after getting a worker id.
   * This method requires no locking on {@link MasterWorkerInfo} because it is only
   * reading final fields.
   *
   * @param workerId the worker id to register
   */
  @Nullable
  protected MasterWorkerInfo recordWorkerRegistration(long workerId) {
    for (IndexedSet workers: Arrays.asList(mTempWorkers,
        mLostWorkers, mDecommissionedWorkers)) {
      MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId);
      if (worker == null) {
        continue;
      }

      mWorkers.add(worker);
      workers.remove(worker);
      if (workers == mLostWorkers) {
        for (Consumer
function : mLostWorkerFoundListeners) { // The worker address is final, no need for locking here function.accept(new Address(worker.getWorkerAddress().getHost(), worker.getWorkerAddress().getRpcPort())); } LOG.warn("A lost worker {} has requested its old id {}.", worker.getWorkerAddress(), worker.getId()); } return worker; } return null; } @Override public long getWorkerId(WorkerNetAddress workerNetAddress) { if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { throw new UnavailableRuntimeException( "GetWorkerId operation is not supported on standby masters"); } if (isRejected(workerNetAddress)) { String msg = String.format(WORKER_DISABLED, workerNetAddress); LOG.warn("{}", msg); throw new UnavailableRuntimeException(msg); } LOG.info("Worker {} requesting for an ID", workerNetAddress); MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { // This worker address is already mapped to a worker id. long oldWorkerId = existingWorker.getId(); LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId); return oldWorkerId; } existingWorker = findUnregisteredWorker(workerNetAddress); if (existingWorker != null) { return existingWorker.getId(); } // Generate a new worker id. long workerId = IdUtils.getRandomNonNegativeLong(); while (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { workerId = IdUtils.getRandomNonNegativeLong(); } LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); return workerId; } @Override public void notifyWorkerId(long workerId, WorkerNetAddress workerNetAddress) { MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ID_INDEX, workerId); if (existingWorker != null) { LOG.warn("A registered worker {} comes again from {}", workerId, existingWorker.getWorkerAddress()); return; } existingWorker = findUnregisteredWorker(workerId); if (existingWorker != null) { LOG.warn("An unregistered worker {} comes again from {}", workerId, existingWorker.getWorkerAddress()); return; } if (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { throw new RuntimeException("Duplicated worker ID for " + workerId + ": " + workerNetAddress); } LOG.info("notifyWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); } @Override public Optional tryAcquireRegisterLease(GetRegisterLeasePRequest request) { return mRegisterLeaseManager.tryAcquireLease(request); } @Override public boolean hasRegisterLease(long workerId) { return mRegisterLeaseManager.hasLease(workerId); } @Override public void releaseRegisterLease(long workerId) { mRegisterLeaseManager.releaseLease(workerId); } @Override public void workerRegister(long workerId, List storageTiers, Map totalBytesOnTiers, Map usedBytesOnTiers, Map> currentBlocksOnLocation, Map lostStorage, RegisterWorkerPOptions options) throws NotFoundException { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { worker = findUnregisteredWorker(workerId); } if (worker == null) { throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } if (isRejected(worker.getWorkerAddress())) { throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, worker.getWorkerAddress())); } worker.setBuildVersion(options.getBuildVersion()); // Gather all blocks on this worker. int totalSize = currentBlocksOnLocation.values().stream().mapToInt(List::size).sum(); Set blocks = new LongOpenHashSet(totalSize); for (List blockIds : currentBlocksOnLocation.values()) { blocks.addAll(blockIds); } // Lock all the locks try (LockResource r = worker.lockWorkerMeta(EnumSet.of( WorkerMetaLockSection.STATUS, WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) { // Detect any lost blocks on this worker. Set removedBlocks = worker.register(MASTER_STORAGE_TIER_ASSOC, storageTiers, totalBytesOnTiers, usedBytesOnTiers, blocks); processWorkerRemovedBlocks(worker, removedBlocks, false); processWorkerAddedBlocks(worker, currentBlocksOnLocation); processWorkerOrphanedBlocks(worker); worker.addLostStorage(lostStorage); } if (options.getConfigsCount() > 0) { for (BiConsumer> function : mWorkerRegisteredListeners) { WorkerNetAddress workerAddress = worker.getWorkerAddress(); function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort()), options.getConfigsList()); } } recordWorkerRegistration(workerId); // Update the TS at the end of the process worker.updateLastUpdatedTimeMs(); // Invalidate cache to trigger new build of worker info list mWorkerInfoCache.invalidate(LIVE_WORKER_INFO_CACHE_KEY); mWorkerInfoCache.invalidate(LOST_WORKER_INFO_CACHE_KEY); LOG.info("registerWorker(): {}", worker); } @Override public MasterWorkerInfo getWorker(long workerId) throws NotFoundException { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { worker = findUnregisteredWorker(workerId); } if (worker == null) { throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } return worker; } private MasterWorkerInfo getLiveOrDecommissionedWorker(long workerId) { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker != null) { return worker; } // If not found in the decommissioned worker, this returns null return mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); } private void processDecommissionedWorkerBlocks(MasterWorkerInfo workerInfo) { processWorkerRemovedBlocks(workerInfo, workerInfo.getBlocks(), false); } /** * Updates the metadata for the specified decommissioned worker. * @param worker the master worker info */ private void processDecommissionedWorker(MasterWorkerInfo worker, boolean canRegisterAgain) { WorkerNetAddress address = worker.getWorkerAddress(); if (canRegisterAgain) { LOG.info("Worker with address {} is decommissioned but will be accepted when it " + "registers again.", address); } else { LOG.info("Worker with address {} will be rejected on register/heartbeat", address); mRejectWorkers.add(address); } mDecommissionedWorkers.add(worker); // Remove worker from all other possible states mWorkers.remove(worker); mTempWorkers.remove(worker); mLostWorkers.remove(worker); // Invalidate cache to trigger new build of worker info list mWorkerInfoCache.invalidate(LIVE_WORKER_INFO_CACHE_KEY); mWorkerInfoCache.invalidate(LOST_WORKER_INFO_CACHE_KEY); WorkerNetAddress workerNetAddress = worker.getWorkerAddress(); // TODO(bzheng888): Maybe need a new listener such as WorkerDecommissionListener. for (Consumer
function : mWorkerLostListeners) { function.accept(new Address(workerNetAddress.getHost(), workerNetAddress.getRpcPort())); } processDecommissionedWorkerBlocks(worker); } @Override public void workerRegisterStream(WorkerRegisterContext context, RegisterWorkerPRequest chunk, boolean isFirstMsg) { // TODO(jiacheng): find a place to check the lease if (isFirstMsg) { workerRegisterStart(context, chunk); } else { workerRegisterBatch(context, chunk); } } protected void workerRegisterStart(WorkerRegisterContext context, RegisterWorkerPRequest chunk) { MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); if (isRejected(workerInfo.getWorkerAddress())) { throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, workerInfo.getWorkerAddress())); } final List storageTiers = chunk.getStorageTiersList(); final Map totalBytesOnTiers = chunk.getTotalBytesOnTiersMap(); final Map usedBytesOnTiers = chunk.getUsedBytesOnTiersMap(); final Map lostStorage = chunk.getLostStorageMap(); final Map> currentBlocksOnLocation = BlockMasterWorkerServiceHandler.reconstructBlocksOnLocationMap( chunk.getCurrentBlocksList(), context.getWorkerId()); RegisterWorkerPOptions options = chunk.getOptions(); mActiveRegisterContexts.put(workerInfo.getId(), context); // The workerInfo is locked so we can operate on its blocks without race conditions // We start with assuming all blocks in (mBlocks + mToRemoveBlocks) do not exist. // With each batch we receive, we mark them not-to-be-removed. // Eventually what's left in the mToRemove will be the ones that do not exist anymore. workerInfo.markAllBlocksToRemove(); workerInfo.updateUsage(MASTER_STORAGE_TIER_ASSOC, storageTiers, totalBytesOnTiers, usedBytesOnTiers); processWorkerAddedBlocks(workerInfo, currentBlocksOnLocation); processWorkerOrphanedBlocks(workerInfo); workerInfo.addLostStorage(lostStorage); workerInfo.setBuildVersion(options.getBuildVersion()); // TODO(jiacheng): This block can be moved to a non-locked section if (options.getConfigsCount() > 0) { for (BiConsumer> function : mWorkerRegisteredListeners) { WorkerNetAddress workerAddress = workerInfo.getWorkerAddress(); function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort()), options.getConfigsList()); } } } protected void workerRegisterBatch(WorkerRegisterContext context, RegisterWorkerPRequest chunk) { final Map> currentBlocksOnLocation = BlockMasterWorkerServiceHandler.reconstructBlocksOnLocationMap( chunk.getCurrentBlocksList(), context.getWorkerId()); MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); if (isRejected(workerInfo.getWorkerAddress())) { throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, workerInfo.getWorkerAddress())); } // Even if we add the BlockLocation before the workerInfo is fully registered, // it should be fine because the block can be read on this workerInfo. // If the stream fails in the middle, the blocks recorded on the MasterWorkerInfo // will be removed by processLostWorker() processWorkerAddedBlocks(workerInfo, currentBlocksOnLocation); processWorkerOrphanedBlocks(workerInfo); // Update the TS at the end of the process workerInfo.updateLastUpdatedTimeMs(); } @Override public void workerRegisterFinish(WorkerRegisterContext context) { MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); if (isRejected(workerInfo.getWorkerAddress())) { throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, workerInfo.getWorkerAddress())); } // Detect any lost blocks on this workerInfo. Set removedBlocks; if (workerInfo.mIsRegistered) { // This is a re-register of an existing workerInfo. Assume the new block ownership data is // more up-to-date and update the existing block information. LOG.info("re-registering an existing workerId: {}", workerInfo.getId()); // The toRemoveBlocks field now contains all the updates // after all the blocks have been processed. removedBlocks = workerInfo.getToRemoveBlocks(); } else { removedBlocks = Collections.emptySet(); } LOG.info("Found {} blocks to remove from the workerInfo", removedBlocks.size()); processWorkerRemovedBlocks(workerInfo, removedBlocks, true); // Mark registered successfully workerInfo.mIsRegistered = true; recordWorkerRegistration(workerInfo.getId()); // Update the TS at the end of the process workerInfo.updateLastUpdatedTimeMs(); // Invalidate cache to trigger new build of workerInfo info list mWorkerInfoCache.invalidate(LIVE_WORKER_INFO_CACHE_KEY); mWorkerInfoCache.invalidate(LOST_WORKER_INFO_CACHE_KEY); LOG.info("Worker successfully registered: {}", workerInfo); mActiveRegisterContexts.remove(workerInfo.getId()); mRegisterLeaseManager.releaseLease(workerInfo.getId()); } @Override public Command workerHeartbeat(long workerId, Map capacityBytesOnTiers, Map usedBytesOnTiers, List removedBlockIds, Map> addedBlocks, Map lostStorage, List metrics) { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { /* * If the worker is not recognized: * 1. The worker never registered to the cluster, or the master has restarted/failover * 2. The worker has been decommissioned and removed from the active worker list */ worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); if (worker != null) { WorkerNetAddress workerAddr = worker.getWorkerAddress(); if (isRejected(worker.getWorkerAddress())) { LOG.info("Received heartbeat from a disabled worker {}", workerAddr.getHost() + ":" + workerAddr.getRpcPort()); return Command.newBuilder().setCommandType(CommandType.Disabled).build(); } LOG.info("Received heartbeat from a decommissioned worker {}", workerAddr.getHost() + ":" + workerAddr.getRpcPort()); return Command.newBuilder().setCommandType(CommandType.Decommissioned).build(); } LOG.warn("Could not find worker id: {} for heartbeat.", workerId); return Command.newBuilder().setCommandType(CommandType.Register).build(); } if (isRejected(worker.getWorkerAddress())) { throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, worker.getWorkerAddress())); } // Update the TS before the heartbeat so even if the worker heartbeat processing // is time-consuming or triggers GC, the worker does not get marked as lost // by the LostWorkerDetectionHeartbeatExecutor worker.updateLastUpdatedTimeMs(); /* * In 2.x, a standby master needs to wait for the journal entry for those blocks to arrive * before it can process a worker heartbeat. * In 3.x since the block info are not written and propagated by journal to a standby master, * this wait is unnecessary. * The code is kept around just in case (until later we are sure about removing it). */ // if (mWorkerRegisterToAllMasters && mPrimarySelector.getState() == NodeState.STANDBY) { // waitBlockIdPresent( // addedBlocks.values().stream().flatMap(Collection::stream) // .collect(Collectors.toList()), workerId); // } // The address is final, no need for locking processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics); Command workerCommand = null; try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) { worker.addLostStorage(lostStorage); if (capacityBytesOnTiers != null) { worker.updateCapacityBytes(capacityBytesOnTiers); } worker.updateUsedBytes(usedBytesOnTiers); // Technically, 'worker' should be confirmed to still be in the data structure. Lost worker // detection can remove it. However, we are intentionally ignoring this race, since the worker // will just re-register regardless. processWorkerRemovedBlocks(worker, removedBlockIds, false); processWorkerAddedBlocks(worker, addedBlocks); Set toRemoveBlocks = worker.getToRemoveBlocks(); if (toRemoveBlocks.isEmpty() || mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build(); } else { workerCommand = Command.newBuilder().setCommandType(CommandType.Free) .addAllData(toRemoveBlocks).build(); } } // Update the TS again worker.updateLastUpdatedTimeMs(); // Should not reach here Preconditions.checkNotNull(workerCommand, "Worker heartbeat response command is null!"); return workerCommand; } /** * Waits for the block id being presents. * If workers register to standby masters, when a block is created, * heartbeats might come before the standby applies the journal. * To prevent this, we wait as best efforts before ignore unknown block ids. */ private void waitBlockIdPresent(Collection blockIds, long workerId) { final List blockIdsToWait = new ArrayList<>(); for (long addedBlockId : blockIds) { if (!mBlockMetaStore.getBlock(addedBlockId).isPresent()) { blockIdsToWait.add(addedBlockId); } } try { CommonUtils.waitFor( "Wait for blocks being committed on master before adding block locations", () -> { for (long blockId: blockIdsToWait) { if (!mBlockMetaStore.getBlock(blockId).isPresent()) { return false; } } return true; }, WaitForOptions.defaults().setInterval(200).setTimeoutMs(1000) ); } catch (InterruptedException | TimeoutException e) { StringBuilder sb = new StringBuilder(); sb.append("["); for (long blockIdToWait : blockIdsToWait) { if (!mBlockMetaStore.getBlock(blockIdToWait).isPresent()) { sb.append(blockIdToWait); sb.append(" ,"); } } sb.append("]"); LOG.warn("Adding block ids {} for worker {} but these blocks don't exist. " + "These blocks will be ignored", sb, workerId); } } @Override public Clock getClock() { return mClock; } private void processWorkerMetrics(String hostname, List metrics) { if (metrics.isEmpty()) { return; } mMetricsMaster.workerHeartbeat(hostname, metrics); } /** * Updates the worker and block metadata for blocks removed from a worker. * * You should lock externally with {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)} * with {@link WorkerMetaLockSection#BLOCKS} specified. * An exclusive lock is required. * * @param workerInfo The worker metadata object * @param removedBlockIds A list of block ids removed from the worker */ private void processWorkerRemovedBlocks(MasterWorkerInfo workerInfo, Collection removedBlockIds, boolean sendCommand) { for (long removedBlockId : removedBlockIds) { try (LockResource r = lockBlock(removedBlockId)) { Optional block = mBlockMetaStore.getBlock(removedBlockId); if (block.isPresent()) { LOG.debug("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId()); mBlockMetaStore.removeLocation(removedBlockId, workerInfo.getId()); if (mBlockMetaStore.getLocations(removedBlockId).size() == 0) { mLostBlocks.add(removedBlockId); } } // Remove the block even if its metadata has been deleted already. if (sendCommand) { workerInfo.scheduleRemoveFromWorker(removedBlockId); } else { workerInfo.removeBlockFromWorkerMeta(removedBlockId); } } } } /** * Updates the worker and block metadata for blocks added to a worker. * * You should lock externally with {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)} * with {@link WorkerMetaLockSection#BLOCKS} specified. * An exclusive lock is required. * * @param workerInfo The worker metadata object * @param addedBlockIds A mapping from storage tier alias to a list of block ids added */ private void processWorkerAddedBlocks(MasterWorkerInfo workerInfo, Map> addedBlockIds) { long invalidBlockCount = 0; for (Map.Entry> entry : addedBlockIds.entrySet()) { for (long blockId : entry.getValue()) { try (LockResource r = lockBlock(blockId)) { Optional block = mBlockMetaStore.getBlock(blockId); if (block.isPresent()) { workerInfo.addBlock(blockId); BlockLocation location = entry.getKey(); Preconditions.checkState(location.getWorkerId() == workerInfo.getId(), "BlockLocation has a different workerId %s from the request sender's workerId %s", location.getWorkerId(), workerInfo.getId()); mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached(location)); mLostBlocks.remove(blockId); } else { invalidBlockCount++; // The block is not recognized and should therefore be purged from the worker // The file may have been removed when the worker was lost workerInfo.scheduleRemoveFromWorker(blockId); LOG.debug("Invalid block: {} from worker {}.", blockId, workerInfo.getWorkerAddress().getHost()); } } } } if (invalidBlockCount > 0) { LOG.warn("{} invalid blocks found on worker {} in total", invalidBlockCount, workerInfo.getWorkerAddress().getHost()); } } /** * Checks the blocks on the worker. For blocks not present in Alluxio anymore, * they will be marked to-be-removed from the worker. * * You should lock externally with {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)} * with {@link WorkerMetaLockSection#USAGE} specified. * A shared lock is required. * * @param workerInfo The worker metadata object */ private void processWorkerOrphanedBlocks(MasterWorkerInfo workerInfo) { long orphanedBlockCount = 0; for (long block : workerInfo.getBlocks()) { if (!mBlockMetaStore.getBlock(block).isPresent()) { orphanedBlockCount++; LOG.debug("Requesting delete for orphaned block: {} from worker {}.", block, workerInfo.getWorkerAddress().getHost()); workerInfo.updateToRemovedBlock(true, block); } } if (orphanedBlockCount > 0) { LOG.warn("{} blocks marked as orphaned from worker {}", orphanedBlockCount, workerInfo.getWorkerAddress().getHost()); } } @Override public boolean isBlockLost(long blockId) { return mLostBlocks.contains(blockId); } @Override public Iterator getLostBlocksIterator() { return mLostBlocks.iterator(); } @Override public int getLostBlocksCount() { return mLostBlocks.size(); } private long getToRemoveBlockCount() { long ret = 0; for (MasterWorkerInfo worker : mWorkers) { try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), true)) { ret += worker.getToRemoveBlockCount(); } } return ret; } /** * Generates block info, including worker locations, for a block id. * This requires no locks on the {@link MasterWorkerInfo} because it is only reading * final fields. * * @param blockId a block id * @return optional block info, empty if the block does not exist */ private Optional generateBlockInfo(long blockId) throws UnavailableException { if (mSafeModeManager.isInSafeMode()) { throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage()); } BlockMeta block; List blockLocations; try (LockResource r = lockBlock(blockId)) { Optional blockOpt = mBlockMetaStore.getBlock(blockId); if (!blockOpt.isPresent()) { return Optional.empty(); } block = blockOpt.get(); blockLocations = new ArrayList<>(mBlockMetaStore.getLocations(blockId)); } // Sort the block locations by their alias ordinal in the master storage tier mapping blockLocations.sort(Comparator.comparingInt( o -> MASTER_STORAGE_TIER_ASSOC.getOrdinal(o.getTier()))); List locations = new ArrayList<>(blockLocations.size()); for (BlockLocation location : blockLocations) { // Decommissioned workers are not included in the available locations // Note that this may introduce a short unavailability on the block, before // this worker registers again (and wipes out the decommissioned state). MasterWorkerInfo workerInfo = mWorkers.getFirstByField(ID_INDEX, location.getWorkerId()); if (workerInfo != null) { // worker metadata is intentionally not locked here because: // - it would be an incorrect order (correct order is lock worker first, then block) // - only uses getters of final variables locations.add(new alluxio.wire.BlockLocation().setWorkerId(location.getWorkerId()) .setWorkerAddress(workerInfo.getWorkerAddress()) .setTierAlias(location.getTier()).setMediumType(location.getMediumType())); } } return Optional.of( new BlockInfo().setBlockId(blockId).setLength(block.getLength()).setLocations(locations)); } @Override public void reportLostBlocks(List blockIds) { mLostBlocks.addAll(blockIds); } @Override public Set> getDependencies() { return DEPS; } /** * Lost worker periodic check. */ public final class LostWorkerDetectionHeartbeatExecutor implements HeartbeatExecutor { /** * Constructs a new {@link LostWorkerDetectionHeartbeatExecutor}. */ public LostWorkerDetectionHeartbeatExecutor() {} @Override public void heartbeat(long timeLimitMs) { long masterWorkerTimeoutMs = Configuration.getMs(PropertyKey.MASTER_WORKER_TIMEOUT_MS); long masterWorkerDeleteTimeoutMs = Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DELETION_TIMEOUT_MS); for (MasterWorkerInfo worker : mWorkers) { try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { // This is not locking because the field is atomic final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if (lastUpdate > masterWorkerTimeoutMs) { LOG.error("The worker {}({}) timed out after {}ms without a heartbeat!", worker.getId(), worker.getWorkerAddress(), lastUpdate); processLostWorker(worker); } } } for (MasterWorkerInfo worker : mLostWorkers) { try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { LOG.error("The lost worker {}({}) timed out after {}ms without a heartbeat! " + "Master will forget about this worker.", worker.getId(), worker.getWorkerAddress(), lastUpdate); deleteWorkerMetadata(worker); } } } for (MasterWorkerInfo worker : mDecommissionedWorkers) { try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { LOG.error("The decommissioned worker {}({}) timed out after {}ms without a heartbeat! " + "Master will forget about this worker.", worker.getId(), worker.getWorkerAddress(), lastUpdate); deleteWorkerMetadata(worker); } } } } @Override public void close() { // Nothing to clean up } } /** * Forces all workers to be lost. This should only be used for testing. */ @VisibleForTesting public void forgetAllWorkers() { for (MasterWorkerInfo worker : mWorkers) { try (LockResource r = worker.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { processLostWorker(worker); } } } /** * Updates the metadata for the specified lost worker. * * You should lock externally with {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)} * with {@link WorkerMetaLockSection#BLOCKS} specified. * An exclusive lock is required. * * @param worker the worker metadata */ private void processLostWorker(MasterWorkerInfo worker) { mLostWorkers.add(worker); mWorkers.remove(worker); // Invalidate cache to trigger new build of worker info list mWorkerInfoCache.invalidate(LIVE_WORKER_INFO_CACHE_KEY); mWorkerInfoCache.invalidate(LOST_WORKER_INFO_CACHE_KEY); // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerLostListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); } // We only remove the blocks from master locations but do not // mark these blocks to-remove from the worker. // So if the worker comes back again the blocks are kept. processWorkerRemovedBlocks(worker, worker.getBlocks(), false); BlockLocationUtils.evictByWorkerId(worker.getId()); } private void deleteWorkerMetadata(MasterWorkerInfo worker) { mWorkers.remove(worker); mLostWorkers.remove(worker); // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); mDecommissionedWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerDeleteListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); } } LockResource lockBlock(long blockId) { return new LockResource(mBlockLocks.get(blockId)); } /** * Selects the MasterWorkerInfo from workerInfoSet whose host or related IP address * exists in addresses. * * @param addresses the address set that user passed in * @param workerInfoSet the MasterWorkerInfo set to select info from * @param workerNames the supported worker names */ private Set selectInfoByAddress(Set addresses, Set workerInfoSet, Set workerNames) { return workerInfoSet.stream().filter(info -> { String host = info.getWorkerAddress().getHost(); workerNames.add(host); String ip = null; try { ip = NetworkAddressUtils.resolveIpAddress(host); workerNames.add(ip); } catch (UnknownHostException e) { // The host may already be an IP address } if (addresses.contains(host)) { addresses.remove(host); return true; } if (ip != null) { if (addresses.contains(ip)) { addresses.remove(ip); return true; } } return false; }).collect(Collectors.toSet()); } @Override public void registerLostWorkerFoundListener(Consumer
function) { mLostWorkerFoundListeners.add(function); } @Override public void registerWorkerLostListener(Consumer
function) { mWorkerLostListeners.add(function); } @Override public void registerWorkerDeleteListener(Consumer
function) { mWorkerDeleteListeners.add(function); } @Override public void registerNewWorkerConfListener(BiConsumer> function) { mWorkerRegisteredListeners.add(function); } /** * Class that contains metrics related to BlockMaster. */ public static final class Metrics { /** * Registers metric gauges. * * @param master the block master handle */ @VisibleForTesting public static void registerGauges(final DefaultBlockMaster master) { MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_CAPACITY_TOTAL.getName(), master::getCapacityBytes); MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_CAPACITY_USED.getName(), master::getUsedBytes); MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_CAPACITY_FREE.getName(), () -> master.getCapacityBytes() - master.getUsedBytes()); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_UNIQUE_BLOCKS.getName(), master::getUniqueBlockCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TOTAL_BLOCK_REPLICA_COUNT.getName(), master::getBlockReplicaCount); for (int i = 0; i < master.getGlobalStorageTierAssoc().size(); i++) { String alias = master.getGlobalStorageTierAssoc().getAlias(i); // TODO(lu) Add template to dynamically construct metric key MetricsSystem.registerGaugeIfAbsent( MetricKey.CLUSTER_CAPACITY_TOTAL.getName() + MetricInfo.TIER + alias, () -> master.getTotalBytesOnTiers().getOrDefault(alias, 0L)); MetricsSystem.registerGaugeIfAbsent( MetricKey.CLUSTER_CAPACITY_USED.getName() + MetricInfo.TIER + alias, () -> master.getUsedBytesOnTiers().getOrDefault(alias, 0L)); MetricsSystem.registerGaugeIfAbsent( MetricKey.CLUSTER_CAPACITY_FREE.getName() + MetricInfo.TIER + alias, () -> master.getTotalBytesOnTiers().getOrDefault(alias, 0L) - master.getUsedBytesOnTiers().getOrDefault(alias, 0L)); } MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_WORKERS.getName(), master::getWorkerCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_LOST_WORKERS.getName(), master::getLostWorkerCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_CACHED_BLOCK_LOCATIONS.getName(), BlockLocationUtils::getCachedBlockLocationSize); } private Metrics() {} // prevent instantiation } /** * @return the block meta store */ @VisibleForTesting public BlockMetaStore getBlockMetaStore() { return mBlockMetaStore; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy