alluxio.master.block.DefaultBlockMaster Maven / Gradle / Ivy
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.block;
import alluxio.Constants;
import alluxio.MasterStorageTierAssoc;
import alluxio.Server;
import alluxio.StorageTierAssoc;
import alluxio.client.block.options.GetWorkerReportOptions;
import alluxio.client.block.options.GetWorkerReportOptions.WorkerRange;
import alluxio.clock.SystemClock;
import alluxio.collections.ConcurrentHashSet;
import alluxio.collections.IndexDefinition;
import alluxio.collections.IndexedSet;
import alluxio.conf.PropertyKey;
import alluxio.conf.ServerConfiguration;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.status.InvalidArgumentException;
import alluxio.exception.status.NotFoundException;
import alluxio.exception.status.UnavailableException;
import alluxio.grpc.Command;
import alluxio.grpc.CommandType;
import alluxio.grpc.ConfigProperty;
import alluxio.grpc.GrpcService;
import alluxio.grpc.GrpcUtils;
import alluxio.grpc.RegisterWorkerPOptions;
import alluxio.grpc.ServiceType;
import alluxio.grpc.StorageList;
import alluxio.grpc.WorkerLostStorageInfo;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatExecutor;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.master.CoreMaster;
import alluxio.master.CoreMasterContext;
import alluxio.master.block.meta.MasterWorkerInfo;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.checkpoint.CheckpointName;
import alluxio.master.metastore.BlockStore;
import alluxio.master.metastore.BlockStore.Block;
import alluxio.master.metrics.MetricsMaster;
import alluxio.metrics.Metric;
import alluxio.metrics.MetricsSystem;
import alluxio.proto.journal.Block.BlockContainerIdGeneratorEntry;
import alluxio.proto.journal.Block.BlockInfoEntry;
import alluxio.proto.journal.Block.DeleteBlockEntry;
import alluxio.proto.journal.Journal.JournalEntry;
import alluxio.proto.meta.Block.BlockLocation;
import alluxio.proto.meta.Block.BlockMeta;
import alluxio.resource.LockResource;
import alluxio.util.CommonUtils;
import alluxio.util.IdUtils;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceFactory;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.wire.Address;
import alluxio.wire.BlockInfo;
import alluxio.wire.WorkerInfo;
import alluxio.wire.WorkerNetAddress;
import com.codahale.metrics.Gauge;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.common.util.concurrent.Striped;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.UnknownHostException;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.locks.Lock;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import javax.annotation.concurrent.NotThreadSafe;
/**
* This block master manages the metadata for all the blocks and block workers in Alluxio.
*/
@NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1664)
public final class DefaultBlockMaster extends CoreMaster implements BlockMaster {
private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockMaster.class);
private static final Set> DEPS =
ImmutableSet.>of(MetricsMaster.class);
/**
* The number of container ids to 'reserve' before having to journal container id state. This
* allows the master to return container ids within the reservation, without having to write to
* the journal.
*/
private static final long CONTAINER_ID_RESERVATION_SIZE = 1000;
// Worker metadata management.
private static final IndexDefinition ID_INDEX =
new IndexDefinition(true) {
@Override
public Long getFieldValue(MasterWorkerInfo o) {
return o.getId();
}
};
private static final IndexDefinition ADDRESS_INDEX =
new IndexDefinition(true) {
@Override
public WorkerNetAddress getFieldValue(MasterWorkerInfo o) {
return o.getWorkerAddress();
}
};
/**
* Concurrency and locking in the BlockMaster
*
* The block master uses concurrent data structures to allow non-conflicting concurrent access.
* This means each piece of metadata should be locked individually. There are two types of
* metadata in the {@link DefaultBlockMaster}; block metadata and worker metadata.
*
* To modify or read a modifiable piece of worker metadata, the {@link MasterWorkerInfo} for the
* worker must be locked. For block metadata, the id of the block must be locked. This will
* protect the internal integrity of the block and worker metadata.
*
* A worker's lock must be held to
* - Write mutable state in the worker's MasterWorkerInfo
* - Modify a block location on the worker
*
* A block's lock must be held to
* - Perform any BlockStore operations on the block
* - Add or remove the block from mLostBlocks
*
* Lock ordering must be preserved in order to prevent deadlock. If both worker and block
* metadata must be locked at the same time, the worker metadata must be locked before the block
* metadata
*
* It should not be the case that multiple worker metadata must be locked at the same time, or
* multiple block metadata must be locked at the same time. Operations involving different workers
* or different blocks should be able to be performed independently.
*/
/**
* 10k locks balances between keeping a small memory footprint and avoiding unnecessary lock
* contention. Each stripe is around 100 bytes, so this takes about 1MB. Block locking critical
* sections are short, so it is acceptable to occasionally have conflicts where two different
* blocks want to lock the same stripe.
*/
private final Striped mBlockLocks = Striped.lock(10_000);
/** Manages block metadata and block locations. */
private final BlockStore mBlockStore;
/** Keeps track of blocks which are no longer in Alluxio storage. */
private final ConcurrentHashSet mLostBlocks = new ConcurrentHashSet<>(64, 0.90f, 64);
/** This state must be journaled. */
@GuardedBy("itself")
private final BlockContainerIdGenerator mBlockContainerIdGenerator =
new BlockContainerIdGenerator();
/**
* Mapping between all possible storage level aliases and their ordinal position. This mapping
* forms a total ordering on all storage level aliases in the system, and must be consistent
* across masters.
*/
private final StorageTierAssoc mGlobalStorageTierAssoc;
/** Keeps track of workers which are in communication with the master. */
private final IndexedSet mWorkers =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** Keeps track of workers which are no longer in communication with the master. */
private final IndexedSet mLostWorkers =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** Worker is not visualable until registration completes. */
private final IndexedSet mTempWorkers =
new IndexedSet<>(ID_INDEX, ADDRESS_INDEX);
/** Listeners to call when lost workers are found. */
private final List> mLostWorkerFoundListeners
= new ArrayList<>();
/** Listeners to call when workers are lost. */
private final List> mWorkerLostListeners = new ArrayList<>();
/** Listeners to call when a new worker registers. */
private final List>> mWorkerRegisteredListeners
= new ArrayList<>();
/** Handle to the metrics master. */
private final MetricsMaster mMetricsMaster;
/**
* The service that detects lost worker nodes, and tries to restart the failed workers.
* We store it here so that it can be accessed from tests.
*/
@SuppressFBWarnings("URF_UNREAD_FIELD")
private Future> mLostWorkerDetectionService;
/** The value of the 'next container id' last journaled. */
@GuardedBy("mBlockContainerIdGenerator")
private long mJournaledNextContainerId = 0;
/**
* Creates a new instance of {@link DefaultBlockMaster}.
*
* @param metricsMaster the metrics master
* @param masterContext the context for Alluxio master
*/
DefaultBlockMaster(MetricsMaster metricsMaster, CoreMasterContext masterContext) {
this(metricsMaster, masterContext, new SystemClock(),
ExecutorServiceFactories.cachedThreadPool(Constants.BLOCK_MASTER_NAME));
}
/**
* Creates a new instance of {@link DefaultBlockMaster}.
*
* @param metricsMaster the metrics master
* @param masterContext the context for Alluxio master
* @param clock the clock to use for determining the time
* @param executorServiceFactory a factory for creating the executor service to use for running
* maintenance threads
*/
DefaultBlockMaster(MetricsMaster metricsMaster, CoreMasterContext masterContext, Clock clock,
ExecutorServiceFactory executorServiceFactory) {
super(masterContext, clock, executorServiceFactory);
Preconditions.checkNotNull(metricsMaster, "metricsMaster");
mBlockStore = masterContext.getBlockStoreFactory().get();
mGlobalStorageTierAssoc = new MasterStorageTierAssoc();
mMetricsMaster = metricsMaster;
Metrics.registerGauges(this);
}
@Override
public String getName() {
return Constants.BLOCK_MASTER_NAME;
}
@Override
public Map getServices() {
Map services = new HashMap<>();
services.put(ServiceType.BLOCK_MASTER_CLIENT_SERVICE,
new GrpcService(new BlockMasterClientServiceHandler(this)));
services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE,
new GrpcService(new BlockMasterWorkerServiceHandler(this)));
return services;
}
@Override
public boolean processJournalEntry(JournalEntry entry) {
// TODO(gene): A better way to process entries besides a huge switch?
if (entry.hasBlockContainerIdGenerator()) {
mJournaledNextContainerId = (entry.getBlockContainerIdGenerator()).getNextContainerId();
mBlockContainerIdGenerator.setNextContainerId((mJournaledNextContainerId));
} else if (entry.hasDeleteBlock()) {
mBlockStore.removeBlock(entry.getDeleteBlock().getBlockId());
} else if (entry.hasBlockInfo()) {
BlockInfoEntry blockInfoEntry = entry.getBlockInfo();
long length = blockInfoEntry.getLength();
Optional block = mBlockStore.getBlock(blockInfoEntry.getBlockId());
if (block.isPresent()) {
long oldLen = block.get().getLength();
if (oldLen != Constants.UNKNOWN_SIZE) {
LOG.warn("Attempting to update block length ({}) to a different length ({}).", oldLen,
length);
return true;
}
}
mBlockStore.putBlock(blockInfoEntry.getBlockId(),
BlockMeta.newBuilder().setLength(blockInfoEntry.getLength()).build());
} else {
return false;
}
return true;
}
@Override
public void resetState() {
mBlockStore.clear();
mJournaledNextContainerId = 0;
mBlockContainerIdGenerator.setNextContainerId(0);
}
@Override
public CheckpointName getCheckpointName() {
return CheckpointName.BLOCK_MASTER;
}
@Override
public Iterator getJournalEntryIterator() {
Iterator it = mBlockStore.iterator();
Iterator blockIterator = new Iterator() {
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public JournalEntry next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
Block block = it.next();
BlockInfoEntry blockInfoEntry =
BlockInfoEntry.newBuilder().setBlockId(block.getId())
.setLength(block.getMeta().getLength()).build();
return JournalEntry.newBuilder().setBlockInfo(blockInfoEntry).build();
}
@Override
public void remove() {
throw new UnsupportedOperationException("BlockMaster#Iterator#remove is not supported.");
}
};
return Iterators
.concat(CommonUtils.singleElementIterator(getContainerIdJournalEntry()), blockIterator);
}
@Override
public void start(Boolean isLeader) throws IOException {
super.start(isLeader);
if (isLeader) {
mLostWorkerDetectionService = getExecutorService().submit(new HeartbeatThread(
HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(),
(int) ServerConfiguration.getMs(PropertyKey.MASTER_WORKER_HEARTBEAT_INTERVAL),
ServerConfiguration.global(), mMasterContext.getUserState()));
}
}
@Override
public void stop() throws IOException {
super.stop();
}
@Override
public void close() throws IOException {
super.close();
mBlockStore.close();
}
@Override
public int getWorkerCount() {
return mWorkers.size();
}
@Override
public int getLostWorkerCount() {
return mLostWorkers.size();
}
@Override
public long getCapacityBytes() {
long ret = 0;
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
ret += worker.getCapacityBytes();
}
}
return ret;
}
@Override
public StorageTierAssoc getGlobalStorageTierAssoc() {
return mGlobalStorageTierAssoc;
}
@Override
public long getUsedBytes() {
long ret = 0;
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
ret += worker.getUsedBytes();
}
}
return ret;
}
@Override
public List getWorkerInfoList() throws UnavailableException {
if (mSafeModeManager.isInSafeMode()) {
throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
}
List workerInfoList = new ArrayList<>(mWorkers.size());
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
workerInfoList.add(worker.generateWorkerInfo(null, true));
}
}
return workerInfoList;
}
@Override
public List getLostWorkersInfoList() throws UnavailableException {
if (mSafeModeManager.isInSafeMode()) {
throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
}
List workerInfoList = new ArrayList<>(mLostWorkers.size());
for (MasterWorkerInfo worker : mLostWorkers) {
synchronized (worker) {
workerInfoList.add(worker.generateWorkerInfo(null, false));
}
}
Collections.sort(workerInfoList, new WorkerInfo.LastContactSecComparator());
return workerInfoList;
}
@Override
public List getWorkerReport(GetWorkerReportOptions options)
throws UnavailableException, InvalidArgumentException {
if (mSafeModeManager.isInSafeMode()) {
throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
}
Set selectedLiveWorkers = new HashSet<>();
Set selectedLostWorkers = new HashSet<>();
WorkerRange workerRange = options.getWorkerRange();
switch (workerRange) {
case ALL:
selectedLiveWorkers.addAll(mWorkers);
selectedLostWorkers.addAll(mLostWorkers);
break;
case LIVE:
selectedLiveWorkers.addAll(mWorkers);
break;
case LOST:
selectedLostWorkers.addAll(mLostWorkers);
break;
case SPECIFIED:
Set addresses = options.getAddresses();
Set workerNames = new HashSet<>();
selectedLiveWorkers = selectInfoByAddress(addresses, mWorkers, workerNames);
selectedLostWorkers = selectInfoByAddress(addresses, mLostWorkers, workerNames);
if (!addresses.isEmpty()) {
String info = String.format("Unrecognized worker names: %s%n"
+ "Supported worker names: %s%n",
addresses.toString(), workerNames.toString());
throw new InvalidArgumentException(info);
}
break;
default:
throw new InvalidArgumentException("Unrecognized worker range: " + workerRange);
}
List workerInfoList = new ArrayList<>();
for (MasterWorkerInfo worker : selectedLiveWorkers) {
synchronized (worker) {
workerInfoList.add(worker.generateWorkerInfo(options.getFieldRange(), true));
}
}
for (MasterWorkerInfo worker : selectedLostWorkers) {
synchronized (worker) {
workerInfoList.add(worker.generateWorkerInfo(options.getFieldRange(), false));
}
}
return workerInfoList;
}
@Override
public List getWorkerLostStorage() {
List workerLostStorageList = new ArrayList<>();
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
if (worker.hasLostStorage()) {
Map lostStorage = worker.getLostStorage().entrySet()
.stream().collect(Collectors.toMap(Map.Entry::getKey,
e -> StorageList.newBuilder().addAllStorage(e.getValue()).build()));
workerLostStorageList.add(WorkerLostStorageInfo.newBuilder()
.setAddress(GrpcUtils.toProto(worker.getWorkerAddress()))
.putAllLostStorage(lostStorage).build());
}
}
}
return workerLostStorageList;
}
@Override
public void removeBlocks(List blockIds, boolean delete) throws UnavailableException {
try (JournalContext journalContext = createJournalContext()) {
for (long blockId : blockIds) {
HashSet workerIds = new HashSet<>();
try (LockResource lr = lockBlock(blockId)) {
Optional block = mBlockStore.getBlock(blockId);
if (!block.isPresent()) {
continue;
}
for (BlockLocation loc : mBlockStore.getLocations(blockId)) {
workerIds.add(loc.getWorkerId());
}
// Two cases here:
// 1) For delete: delete the block metadata.
// 2) For free: keep the block metadata. mLostBlocks will be changed in
// processWorkerRemovedBlocks
if (delete) {
// Make sure blockId is removed from mLostBlocks when the block metadata is deleted.
// Otherwise blockId in mLostBlock can be dangling index if the metadata is gone.
mLostBlocks.remove(blockId);
mBlockStore.removeBlock(blockId);
JournalEntry entry = JournalEntry.newBuilder()
.setDeleteBlock(DeleteBlockEntry.newBuilder().setBlockId(blockId)).build();
journalContext.append(entry);
}
}
// Outside of locking the block. This does not have to be synchronized with the block
// metadata, since it is essentially an asynchronous signal to the worker to remove the
// block.
for (long workerId : workerIds) {
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
if (worker != null) {
synchronized (worker) {
worker.updateToRemovedBlock(true, blockId);
}
}
}
}
}
}
@Override
public void validateBlocks(Function validator, boolean repair)
throws UnavailableException {
List invalidBlocks = new ArrayList<>();
for (Iterator iter = mBlockStore.iterator(); iter.hasNext(); ) {
long id = iter.next().getId();
if (!validator.apply(id)) {
invalidBlocks.add(id);
}
}
if (!invalidBlocks.isEmpty()) {
long limit = 100;
List loggedBlocks = invalidBlocks.stream().limit(limit).collect(Collectors.toList());
LOG.warn("Found {} orphan blocks without corresponding file metadata.", invalidBlocks.size());
if (invalidBlocks.size() > limit) {
LOG.warn("The first {} orphan blocks include {}.", limit, loggedBlocks);
} else {
LOG.warn("The orphan blocks include {}.", loggedBlocks);
}
if (repair) {
LOG.warn("Deleting {} orphan blocks.", invalidBlocks.size());
removeBlocks(invalidBlocks, true);
} else {
LOG.warn("Restart Alluxio master with {}=true to delete the blocks and repair the system.",
PropertyKey.Name.MASTER_STARTUP_BLOCK_INTEGRITY_CHECK_ENABLED);
}
}
}
/**
* @return a new block container id
*/
@Override
public long getNewContainerId() throws UnavailableException {
synchronized (mBlockContainerIdGenerator) {
long containerId = mBlockContainerIdGenerator.getNewContainerId();
if (containerId < mJournaledNextContainerId) {
// This container id is within the reserved container ids, so it is safe to return the id
// without having to write anything to the journal.
return containerId;
}
// This container id is not safe with respect to the last journaled container id.
// Therefore, journal the new state of the container id. This implies that when a master
// crashes, the container ids within the reservation which have not been used yet will
// never be used. This is a tradeoff between fully utilizing the container id space, vs.
// improving master scalability.
// TODO(gpang): investigate if dynamic reservation sizes could be effective
// Set the next id to journal with a reservation of container ids, to avoid having to write
// to the journal for ids within the reservation.
mJournaledNextContainerId = containerId + CONTAINER_ID_RESERVATION_SIZE;
try (JournalContext journalContext = createJournalContext()) {
// This must be flushed while holding the lock on mBlockContainerIdGenerator, in order to
// prevent subsequent calls to return ids that have not been journaled and flushed.
journalContext.append(getContainerIdJournalEntry());
}
return containerId;
}
}
/**
* @return a {@link JournalEntry} representing the state of the container id generator
*/
private JournalEntry getContainerIdJournalEntry() {
BlockContainerIdGeneratorEntry blockContainerIdGenerator =
BlockContainerIdGeneratorEntry.newBuilder().setNextContainerId(mJournaledNextContainerId)
.build();
return JournalEntry.newBuilder().setBlockContainerIdGenerator(blockContainerIdGenerator)
.build();
}
// TODO(binfan): check the logic is correct or not when commitBlock is a retry
@Override
public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias,
String mediumType, long blockId, long length)
throws NotFoundException, UnavailableException {
LOG.debug("Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}",
workerId, usedBytesOnTier, blockId, length);
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
// TODO(peis): Check lost workers as well.
if (worker == null) {
throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
}
// Lock the worker metadata first.
try (JournalContext journalContext = createJournalContext()) {
synchronized (worker) {
try (LockResource lr = lockBlock(blockId)) {
Optional block = mBlockStore.getBlock(blockId);
if (!block.isPresent() || block.get().getLength() != length) {
if (block.isPresent() && block.get().getLength() != Constants.UNKNOWN_SIZE) {
LOG.warn("Rejecting attempt to change block length from {} to {}",
block.get().getLength(), length);
} else {
mBlockStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build());
BlockInfoEntry blockInfo =
BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
}
}
// Update the block metadata with the new worker location.
mBlockStore.addLocation(blockId, BlockLocation.newBuilder()
.setWorkerId(workerId)
.setTier(tierAlias)
.setMediumType(mediumType)
.build());
// This worker has this block, so it is no longer lost.
mLostBlocks.remove(blockId);
// Update the worker information for this new block.
// TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not
// double counted.
worker.addBlock(blockId);
worker.updateUsedBytes(tierAlias, usedBytesOnTier);
worker.updateLastUpdatedTimeMs();
}
}
}
}
@Override
public void commitBlockInUFS(long blockId, long length) throws UnavailableException {
LOG.debug("Commit block in ufs. blockId: {}, length: {}", blockId, length);
try (JournalContext journalContext = createJournalContext();
LockResource lr = lockBlock(blockId)) {
if (mBlockStore.getBlock(blockId).isPresent()) {
// Block metadata already exists, so do not need to create a new one.
return;
}
mBlockStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build());
BlockInfoEntry blockInfo =
BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
}
}
@Override
public BlockInfo getBlockInfo(long blockId) throws BlockInfoException, UnavailableException {
return generateBlockInfo(blockId)
.orElseThrow(() -> new BlockInfoException(ExceptionMessage.BLOCK_META_NOT_FOUND, blockId));
}
@Override
public List getBlockInfoList(List blockIds) throws UnavailableException {
List ret = new ArrayList<>(blockIds.size());
for (long blockId : blockIds) {
generateBlockInfo(blockId).ifPresent(info -> ret.add(info));
}
return ret;
}
@Override
public Map getTotalBytesOnTiers() {
Map ret = new HashMap<>();
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
for (Map.Entry entry : worker.getTotalBytesOnTiers().entrySet()) {
Long total = ret.get(entry.getKey());
ret.put(entry.getKey(), (total == null ? 0L : total) + entry.getValue());
}
}
}
return ret;
}
@Override
public Map getUsedBytesOnTiers() {
Map ret = new HashMap<>();
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
for (Map.Entry entry : worker.getUsedBytesOnTiers().entrySet()) {
Long used = ret.get(entry.getKey());
ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue());
}
}
}
return ret;
}
/**
* Find a worker which is considered lost or just gets its id.
* @param workerNetAddress the address used to find a worker
* @return a {@link MasterWorkerInfo} which is presented in master but not registered,
* or null if not worker is found.
*/
@Nullable
private MasterWorkerInfo findUnregisteredWorker(WorkerNetAddress workerNetAddress) {
for (IndexedSet workers: Arrays.asList(mTempWorkers, mLostWorkers)) {
MasterWorkerInfo worker = workers.getFirstByField(ADDRESS_INDEX, workerNetAddress);
if (worker != null) {
return worker;
}
}
return null;
}
/**
* Find a worker which is considered lost or just gets its id.
* @param workerId the id used to find a worker
* @return a {@link MasterWorkerInfo} which is presented in master but not registered,
* or null if not worker is found.
*/
@Nullable
private MasterWorkerInfo findUnregisteredWorker(long workerId) {
for (IndexedSet workers: Arrays.asList(mTempWorkers, mLostWorkers)) {
MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId);
if (worker != null) {
return worker;
}
}
return null;
}
/**
* Re-register a lost worker or complete registration after getting a worker id.
*
* @param workerId the worker id to register
*/
@Nullable
private MasterWorkerInfo registerWorkerInternal(long workerId) {
for (IndexedSet workers: Arrays.asList(mTempWorkers, mLostWorkers)) {
MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId);
if (worker == null) {
continue;
}
synchronized (worker) {
worker.updateLastUpdatedTimeMs();
mWorkers.add(worker);
workers.remove(worker);
if (workers == mLostWorkers) {
for (Consumer function : mLostWorkerFoundListeners) {
function.accept(new Address(worker.getWorkerAddress().getHost(),
worker.getWorkerAddress().getRpcPort()));
}
LOG.warn("A lost worker {} has requested its old id {}.",
worker.getWorkerAddress(), worker.getId());
}
}
return worker;
}
return null;
}
@Override
public long getWorkerId(WorkerNetAddress workerNetAddress) {
MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress);
if (existingWorker != null) {
// This worker address is already mapped to a worker id.
long oldWorkerId = existingWorker.getId();
LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId);
return oldWorkerId;
}
existingWorker = findUnregisteredWorker(workerNetAddress);
if (existingWorker != null) {
return existingWorker.getId();
}
// Generate a new worker id.
long workerId = IdUtils.getRandomNonNegativeLong();
while (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) {
workerId = IdUtils.getRandomNonNegativeLong();
}
LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId);
return workerId;
}
@Override
public void workerRegister(long workerId, List storageTiers,
Map totalBytesOnTiers, Map usedBytesOnTiers,
Map> currentBlocksOnLocation,
Map lostStorage, RegisterWorkerPOptions options)
throws NotFoundException {
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
if (worker == null) {
worker = findUnregisteredWorker(workerId);
}
if (worker == null) {
throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
}
// Gather all blocks on this worker.
HashSet blocks = new HashSet<>();
for (List blockIds : currentBlocksOnLocation.values()) {
blocks.addAll(blockIds);
}
synchronized (worker) {
worker.updateLastUpdatedTimeMs();
// Detect any lost blocks on this worker.
Set removedBlocks = worker.register(mGlobalStorageTierAssoc, storageTiers,
totalBytesOnTiers, usedBytesOnTiers, blocks);
processWorkerRemovedBlocks(worker, removedBlocks);
processWorkerAddedBlocks(worker, currentBlocksOnLocation);
processWorkerOrphanedBlocks(worker);
worker.addLostStorage(lostStorage);
}
if (options.getConfigsCount() > 0) {
for (BiConsumer> function : mWorkerRegisteredListeners) {
WorkerNetAddress workerAddress = worker.getWorkerAddress();
function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort()),
options.getConfigsList());
}
}
registerWorkerInternal(workerId);
LOG.info("registerWorker(): {}", worker);
}
@Override
public Command workerHeartbeat(long workerId, Map capacityBytesOnTiers,
Map usedBytesOnTiers, List removedBlockIds,
Map> addedBlocks,
Map lostStorage,
List metrics) {
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
if (worker == null) {
LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
return Command.newBuilder().setCommandType(CommandType.Register).build();
}
synchronized (worker) {
// Technically, 'worker' should be confirmed to still be in the data structure. Lost worker
// detection can remove it. However, we are intentionally ignoring this race, since the worker
// will just re-register regardless.
processWorkerRemovedBlocks(worker, removedBlockIds);
processWorkerAddedBlocks(worker, addedBlocks);
processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics);
worker.addLostStorage(lostStorage);
if (capacityBytesOnTiers != null) {
worker.updateCapacityBytes(capacityBytesOnTiers);
}
worker.updateUsedBytes(usedBytesOnTiers);
worker.updateLastUpdatedTimeMs();
List toRemoveBlocks = worker.getToRemoveBlocks();
if (toRemoveBlocks.isEmpty()) {
return Command.newBuilder().setCommandType(CommandType.Nothing).build();
}
return Command.newBuilder().setCommandType(CommandType.Free).addAllData(toRemoveBlocks)
.build();
}
}
private void processWorkerMetrics(String hostname, List metrics) {
if (metrics.isEmpty()) {
return;
}
mMetricsMaster.workerHeartbeat(hostname, metrics);
}
/**
* Updates the worker and block metadata for blocks removed from a worker.
*
* @param workerInfo The worker metadata object
* @param removedBlockIds A list of block ids removed from the worker
*/
@GuardedBy("workerInfo")
private void processWorkerRemovedBlocks(MasterWorkerInfo workerInfo,
Collection removedBlockIds) {
for (long removedBlockId : removedBlockIds) {
try (LockResource lr = lockBlock(removedBlockId)) {
Optional block = mBlockStore.getBlock(removedBlockId);
if (block.isPresent()) {
LOG.debug("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId());
mBlockStore.removeLocation(removedBlockId, workerInfo.getId());
if (mBlockStore.getLocations(removedBlockId).size() == 0) {
mLostBlocks.add(removedBlockId);
}
}
// Remove the block even if its metadata has been deleted already.
workerInfo.removeBlock(removedBlockId);
}
}
}
/**
* Updates the worker and block metadata for blocks added to a worker.
* @param workerInfo The worker metadata object
* @param addedBlockIds A mapping from storage tier alias to a list of block ids added
*/
@GuardedBy("workerInfo")
private void processWorkerAddedBlocks(MasterWorkerInfo workerInfo,
Map> addedBlockIds) {
for (Map.Entry> entry : addedBlockIds.entrySet()) {
for (long blockId : entry.getValue()) {
try (LockResource lr = lockBlock(blockId)) {
Optional block = mBlockStore.getBlock(blockId);
if (block.isPresent()) {
workerInfo.addBlock(blockId);
BlockLocation blockLocation = BlockLocation.newBuilder()
.setWorkerId(workerInfo.getId())
.setTier(entry.getKey().getTier())
.setMediumType(entry.getKey().getMediumType())
.build();
mBlockStore.addLocation(blockId, blockLocation);
mLostBlocks.remove(blockId);
} else {
LOG.warn("Invalid block: {} from worker {}.", blockId,
workerInfo.getWorkerAddress().getHost());
}
}
}
}
}
@GuardedBy("workerInfo")
private void processWorkerOrphanedBlocks(MasterWorkerInfo workerInfo) {
for (long block : workerInfo.getBlocks()) {
if (!mBlockStore.getBlock(block).isPresent()) {
LOG.info("Requesting delete for orphaned block: {} from worker {}.", block,
workerInfo.getWorkerAddress().getHost());
workerInfo.updateToRemovedBlock(true, block);
}
}
}
@Override
public Set getLostBlocks() {
return ImmutableSet.copyOf(mLostBlocks);
}
/**
* Generates block info, including worker locations, for a block id.
*
* @param blockId a block id
* @return optional block info, empty if the block does not exist
*/
@GuardedBy("masterBlockInfo")
private Optional generateBlockInfo(long blockId) throws UnavailableException {
if (mSafeModeManager.isInSafeMode()) {
throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
}
BlockMeta block;
List blockLocations;
try (LockResource lr = lockBlock(blockId)) {
Optional blockOpt = mBlockStore.getBlock(blockId);
if (!blockOpt.isPresent()) {
return Optional.empty();
}
block = blockOpt.get();
blockLocations = new ArrayList<>(mBlockStore.getLocations(blockId));
}
// Sort the block locations by their alias ordinal in the master storage tier mapping
Collections.sort(blockLocations,
Comparator.comparingInt(o -> mGlobalStorageTierAssoc.getOrdinal(o.getTier())));
List locations = new ArrayList<>();
for (BlockLocation location : blockLocations) {
MasterWorkerInfo workerInfo =
mWorkers.getFirstByField(ID_INDEX, location.getWorkerId());
if (workerInfo != null) {
// worker metadata is intentionally not locked here because:
// - it would be an incorrect order (correct order is lock worker first, then block)
// - only uses getters of final variables
locations.add(new alluxio.wire.BlockLocation().setWorkerId(location.getWorkerId())
.setWorkerAddress(workerInfo.getWorkerAddress())
.setTierAlias(location.getTier()).setMediumType(location.getMediumType()));
}
}
return Optional.of(
new BlockInfo().setBlockId(blockId).setLength(block.getLength()).setLocations(locations));
}
@Override
public void reportLostBlocks(List blockIds) {
mLostBlocks.addAll(blockIds);
}
@Override
public Set> getDependencies() {
return DEPS;
}
/**
* Lost worker periodic check.
*/
private final class LostWorkerDetectionHeartbeatExecutor implements HeartbeatExecutor {
/**
* Constructs a new {@link LostWorkerDetectionHeartbeatExecutor}.
*/
public LostWorkerDetectionHeartbeatExecutor() {}
@Override
public void heartbeat() {
long masterWorkerTimeoutMs = ServerConfiguration.getMs(PropertyKey.MASTER_WORKER_TIMEOUT_MS);
for (MasterWorkerInfo worker : mWorkers) {
synchronized (worker) {
final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs();
if (lastUpdate > masterWorkerTimeoutMs) {
LOG.error("The worker {}({}) timed out after {}ms without a heartbeat!", worker.getId(),
worker.getWorkerAddress(), lastUpdate);
mLostWorkers.add(worker);
mWorkers.remove(worker);
WorkerNetAddress workerAddress = worker.getWorkerAddress();
for (Consumer function : mWorkerLostListeners) {
function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort()));
}
processWorkerRemovedBlocks(worker, worker.getBlocks());
}
}
}
}
@Override
public void close() {
// Nothing to clean up
}
}
private LockResource lockBlock(long blockId) {
return new LockResource(mBlockLocks.get(blockId));
}
/**
* Selects the MasterWorkerInfo from workerInfoSet whose host or related IP address
* exists in addresses.
*
* @param addresses the address set that user passed in
* @param workerInfoSet the MasterWorkerInfo set to select info from
* @param workerNames the supported worker names
*/
private Set selectInfoByAddress(Set addresses,
Set workerInfoSet, Set workerNames) {
return workerInfoSet.stream().filter(info -> {
String host = info.getWorkerAddress().getHost();
workerNames.add(host);
String ip = null;
try {
ip = NetworkAddressUtils.resolveIpAddress(host);
workerNames.add(ip);
} catch (UnknownHostException e) {
// The host may already be an IP address
}
if (addresses.contains(host)) {
addresses.remove(host);
return true;
}
if (ip != null) {
if (addresses.contains(ip)) {
addresses.remove(ip);
return true;
}
}
return false;
}).collect(Collectors.toSet());
}
@Override
public void registerLostWorkerFoundListener(Consumer function) {
mLostWorkerFoundListeners.add(function);
}
@Override
public void registerWorkerLostListener(Consumer function) {
mWorkerLostListeners.add(function);
}
@Override
public void registerNewWorkerConfListener(BiConsumer> function) {
mWorkerRegisteredListeners.add(function);
}
/**
* Class that contains metrics related to BlockMaster.
*/
public static final class Metrics {
public static final String CAPACITY_TOTAL = "CapacityTotal";
public static final String CAPACITY_USED = "CapacityUsed";
public static final String CAPACITY_FREE = "CapacityFree";
public static final String WORKERS = "Workers";
public static final String TIER = "Tier";
/**
* Registers metric gauges.
*
* @param master the block master handle
*/
@VisibleForTesting
public static void registerGauges(final BlockMaster master) {
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName(CAPACITY_TOTAL),
master::getCapacityBytes);
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName(CAPACITY_USED),
master::getUsedBytes);
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName(CAPACITY_FREE),
() -> master.getCapacityBytes() - master.getUsedBytes());
for (int i = 0; i < master.getGlobalStorageTierAssoc().size(); i++) {
String alias = master.getGlobalStorageTierAssoc().getAlias(i);
MetricsSystem.registerGaugeIfAbsent(
MetricsSystem.getMetricName(CAPACITY_TOTAL + TIER + alias), new Gauge() {
@Override
public Long getValue() {
return master.getTotalBytesOnTiers().getOrDefault(alias, 0L);
}
});
MetricsSystem.registerGaugeIfAbsent(
MetricsSystem.getMetricName(CAPACITY_USED + TIER + alias), new Gauge() {
@Override
public Long getValue() {
return master.getUsedBytesOnTiers().getOrDefault(alias, 0L);
}
});
MetricsSystem.registerGaugeIfAbsent(
MetricsSystem.getMetricName(CAPACITY_FREE + TIER + alias), new Gauge() {
@Override
public Long getValue() {
return master.getTotalBytesOnTiers().getOrDefault(alias, 0L)
- master.getUsedBytesOnTiers().getOrDefault(alias, 0L);
}
});
}
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName(WORKERS),
new Gauge() {
@Override
public Integer getValue() {
return master.getWorkerCount();
}
});
}
private Metrics() {} // prevent instantiation
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy