All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.master.SplitLogManager Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master;

import static org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective.CHECK;
import static org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective.FORCE;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.DELETED;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.FAILURE;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.IN_PROGRESS;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.SUCCESS;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.SplitLogCounters;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
import org.apache.hadoop.hbase.coordination.SplitLogManagerCoordination;
import org.apache.hadoop.hbase.coordination.SplitLogManagerCoordination.SplitLogManagerDetails;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.DefaultWALProvider;
import org.apache.hadoop.hbase.wal.WALFactory;

import com.google.common.annotations.VisibleForTesting;

/**
 * Distributes the task of log splitting to the available region servers.
 * Coordination happens via coordination engine. For every log file that has to be split a
 * task is created. SplitLogWorkers race to grab a task.
 *
 * 

SplitLogManager monitors the tasks that it creates using the * timeoutMonitor thread. If a task's progress is slow then * {@link SplitLogManagerCoordination#checkTasks} will take away the * task from the owner {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker} * and the task will be up for grabs again. When the task is done then it is deleted * by SplitLogManager. * *

Clients call {@link #splitLogDistributed(Path)} to split a region server's * log files. The caller thread waits in this method until all the log files * have been split. * *

All the coordination calls made by this class are asynchronous. This is mainly * to help reduce response time seen by the callers. * *

There is race in this design between the SplitLogManager and the * SplitLogWorker. SplitLogManager might re-queue a task that has in reality * already been completed by a SplitLogWorker. We rely on the idempotency of * the log splitting task for correctness. * *

It is also assumed that every log splitting task is unique and once * completed (either with success or with error) it will be not be submitted * again. If a task is resubmitted then there is a risk that old "delete task" * can delete the re-submission. */ @InterfaceAudience.Private public class SplitLogManager { private static final Log LOG = LogFactory.getLog(SplitLogManager.class); private Server server; private final Stoppable stopper; private final Configuration conf; private final ChoreService choreService; public static final int DEFAULT_UNASSIGNED_TIMEOUT = (3 * 60 * 1000); // 3 min private long unassignedTimeout; private long lastTaskCreateTime = Long.MAX_VALUE; private long checkRecoveringTimeThreshold = 15000; // 15 seconds private final List, Boolean>> failedRecoveringRegionDeletions = Collections .synchronizedList(new ArrayList, Boolean>>()); /** * In distributedLogReplay mode, we need touch both splitlog and recovering-regions znodes in one * operation. So the lock is used to guard such cases. */ protected final ReentrantLock recoveringRegionLock = new ReentrantLock(); private final ConcurrentMap tasks = new ConcurrentHashMap(); private TimeoutMonitor timeoutMonitor; private volatile Set deadWorkers = null; private final Object deadWorkersLock = new Object(); /** * Its OK to construct this object even when region-servers are not online. It does lookup the * orphan tasks in coordination engine but it doesn't block waiting for them to be done. * @param server the server instance * @param conf the HBase configuration * @param stopper the stoppable in case anything is wrong * @param master the master services * @param serverName the master server name * @throws IOException */ public SplitLogManager(Server server, Configuration conf, Stoppable stopper, MasterServices master, ServerName serverName) throws IOException { this.server = server; this.conf = conf; this.stopper = stopper; this.choreService = new ChoreService(serverName.toString() + "_splitLogManager_"); if (server.getCoordinatedStateManager() != null) { SplitLogManagerCoordination coordination = ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination(); Set failedDeletions = Collections.synchronizedSet(new HashSet()); SplitLogManagerDetails details = new SplitLogManagerDetails(tasks, master, failedDeletions, serverName); coordination.setDetails(details); coordination.init(); // Determine recovery mode } this.unassignedTimeout = conf.getInt("hbase.splitlog.manager.unassigned.timeout", DEFAULT_UNASSIGNED_TIMEOUT); this.timeoutMonitor = new TimeoutMonitor(conf.getInt("hbase.splitlog.manager.timeoutmonitor.period", 1000), stopper); choreService.scheduleChore(timeoutMonitor); } private FileStatus[] getFileList(List logDirs, PathFilter filter) throws IOException { return getFileList(conf, logDirs, filter); } /** * Get a list of paths that need to be split given a set of server-specific directories and * optionally a filter. * * See {@link DefaultWALProvider#getServerNameFromWALDirectoryName} for more info on directory * layout. * * Should be package-private, but is needed by * {@link org.apache.hadoop.hbase.wal.WALSplitter#split(Path, Path, Path, FileSystem, * Configuration, WALFactory)} for tests. */ @VisibleForTesting public static FileStatus[] getFileList(final Configuration conf, final List logDirs, final PathFilter filter) throws IOException { List fileStatus = new ArrayList(); for (Path logDir : logDirs) { final FileSystem fs = logDir.getFileSystem(conf); if (!fs.exists(logDir)) { LOG.warn(logDir + " doesn't exist. Nothing to do!"); continue; } FileStatus[] logfiles = FSUtils.listStatus(fs, logDir, filter); if (logfiles == null || logfiles.length == 0) { LOG.info(logDir + " is empty dir, no logs to split"); } else { Collections.addAll(fileStatus, logfiles); } } FileStatus[] a = new FileStatus[fileStatus.size()]; return fileStatus.toArray(a); } /** * @param logDir one region sever wal dir path in .logs * @throws IOException if there was an error while splitting any log file * @return cumulative size of the logfiles split * @throws IOException */ public long splitLogDistributed(final Path logDir) throws IOException { List logDirs = new ArrayList(); logDirs.add(logDir); return splitLogDistributed(logDirs); } /** * The caller will block until all the log files of the given region server have been processed - * successfully split or an error is encountered - by an available worker region server. This * method must only be called after the region servers have been brought online. * @param logDirs List of log dirs to split * @throws IOException If there was an error while splitting any log file * @return cumulative size of the logfiles split */ public long splitLogDistributed(final List logDirs) throws IOException { if (logDirs.isEmpty()) { return 0; } Set serverNames = new HashSet(); for (Path logDir : logDirs) { try { ServerName serverName = DefaultWALProvider.getServerNameFromWALDirectoryName(logDir); if (serverName != null) { serverNames.add(serverName); } } catch (IllegalArgumentException e) { // ignore invalid format error. LOG.warn("Cannot parse server name from " + logDir); } } return splitLogDistributed(serverNames, logDirs, null); } /** * The caller will block until all the hbase:meta log files of the given region server have been * processed - successfully split or an error is encountered - by an available worker region * server. This method must only be called after the region servers have been brought online. * @param logDirs List of log dirs to split * @param filter the Path filter to select specific files for considering * @throws IOException If there was an error while splitting any log file * @return cumulative size of the logfiles split */ public long splitLogDistributed(final Set serverNames, final List logDirs, PathFilter filter) throws IOException { MonitoredTask status = TaskMonitor.get().createStatus("Doing distributed log split in " + logDirs + " for serverName=" + serverNames); FileStatus[] logfiles = getFileList(logDirs, filter); status.setStatus("Checking directory contents..."); SplitLogCounters.tot_mgr_log_split_batch_start.incrementAndGet(); LOG.info("Started splitting " + logfiles.length + " logs in " + logDirs + " for " + serverNames); long t = EnvironmentEdgeManager.currentTime(); long totalSize = 0; TaskBatch batch = new TaskBatch(); Boolean isMetaRecovery = (filter == null) ? null : false; for (FileStatus lf : logfiles) { // TODO If the log file is still being written to - which is most likely // the case for the last log file - then its length will show up here // as zero. The size of such a file can only be retrieved after // recover-lease is done. totalSize will be under in most cases and the // metrics that it drives will also be under-reported. totalSize += lf.getLen(); String pathToLog = FSUtils.removeWALRootPath(lf.getPath(), conf); if (!enqueueSplitTask(pathToLog, batch)) { throw new IOException("duplicate log split scheduled for " + lf.getPath()); } } waitForSplittingCompletion(batch, status); // remove recovering regions if (filter == MasterFileSystem.META_FILTER /* reference comparison */) { // we split meta regions and user regions separately therefore logfiles are either all for // meta or user regions but won't for both( we could have mixed situations in tests) isMetaRecovery = true; } removeRecoveringRegions(serverNames, isMetaRecovery); if (batch.done != batch.installed) { batch.isDead = true; SplitLogCounters.tot_mgr_log_split_batch_err.incrementAndGet(); LOG.warn("error while splitting logs in " + logDirs + " installed = " + batch.installed + " but only " + batch.done + " done"); String msg = "error or interrupted while splitting logs in " + logDirs + " Task = " + batch; status.abort(msg); throw new IOException(msg); } for (Path logDir : logDirs) { status.setStatus("Cleaning up log directory..."); final FileSystem fs = logDir.getFileSystem(conf); try { if (fs.exists(logDir) && !fs.delete(logDir, false)) { LOG.warn("Unable to delete log src dir. Ignoring. " + logDir); } } catch (IOException ioe) { FileStatus[] files = fs.listStatus(logDir); if (files != null && files.length > 0) { LOG.warn("Returning success without actually splitting and " + "deleting all the log files in path " + logDir + ": " + Arrays.toString(files), ioe); } else { LOG.warn("Unable to delete log src dir. Ignoring. " + logDir, ioe); } } SplitLogCounters.tot_mgr_log_split_batch_success.incrementAndGet(); } String msg = "finished splitting (more than or equal to) " + totalSize + " bytes in " + batch.installed + " log files in " + logDirs + " in " + (EnvironmentEdgeManager.currentTime() - t) + "ms"; status.markComplete(msg); LOG.info(msg); return totalSize; } /** * Add a task entry to coordination if it is not already there. * @param taskname the path of the log to be split * @param batch the batch this task belongs to * @return true if a new entry is created, false if it is already there. */ boolean enqueueSplitTask(String taskname, TaskBatch batch) { lastTaskCreateTime = EnvironmentEdgeManager.currentTime(); String task = ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().prepareTask(taskname); Task oldtask = createTaskIfAbsent(task, batch); if (oldtask == null) { // publish the task in the coordination engine ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().submitTask(task); return true; } return false; } private void waitForSplittingCompletion(TaskBatch batch, MonitoredTask status) { synchronized (batch) { while ((batch.done + batch.error) != batch.installed) { try { status.setStatus("Waiting for distributed tasks to finish. " + " scheduled=" + batch.installed + " done=" + batch.done + " error=" + batch.error); int remaining = batch.installed - (batch.done + batch.error); int actual = activeTasks(batch); if (remaining != actual) { LOG.warn("Expected " + remaining + " active tasks, but actually there are " + actual); } int remainingTasks = ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().remainingTasksInCoordination(); if (remainingTasks >= 0 && actual > remainingTasks) { LOG.warn("Expected at least" + actual + " tasks remaining, but actually there are " + remainingTasks); } if (remainingTasks == 0 || actual == 0) { LOG.warn("No more task remaining, splitting " + "should have completed. Remaining tasks is " + remainingTasks + ", active tasks in map " + actual); if (remainingTasks == 0 && actual == 0) { return; } } batch.wait(100); if (stopper.isStopped()) { LOG.warn("Stopped while waiting for log splits to be completed"); return; } } catch (InterruptedException e) { LOG.warn("Interrupted while waiting for log splits to be completed"); Thread.currentThread().interrupt(); return; } } } } @VisibleForTesting ConcurrentMap getTasks() { return tasks; } private int activeTasks(final TaskBatch batch) { int count = 0; for (Task t : tasks.values()) { if (t.batch == batch && t.status == TerminationStatus.IN_PROGRESS) { count++; } } return count; } /** * It removes recovering regions under /hbase/recovering-regions/[encoded region name] so that the * region server hosting the region can allow reads to the recovered region * @param serverNames servers which are just recovered * @param isMetaRecovery whether current recovery is for the meta region on * serverNames */ private void removeRecoveringRegions(final Set serverNames, Boolean isMetaRecovery) { if (!isLogReplaying()) { // the function is only used in WALEdit direct replay mode return; } if (serverNames == null || serverNames.isEmpty()) return; Set recoveredServerNameSet = new HashSet(); for (ServerName tmpServerName : serverNames) { recoveredServerNameSet.add(tmpServerName.getServerName()); } this.recoveringRegionLock.lock(); try { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().removeRecoveringRegions(recoveredServerNameSet, isMetaRecovery); } catch (IOException e) { LOG.warn("removeRecoveringRegions got exception. Will retry", e); if (serverNames != null && !serverNames.isEmpty()) { this.failedRecoveringRegionDeletions.add(new Pair, Boolean>(serverNames, isMetaRecovery)); } } finally { this.recoveringRegionLock.unlock(); } } /** * It removes stale recovering regions under /hbase/recovering-regions/[encoded region name] * during master initialization phase. * @param failedServers A set of known failed servers * @throws IOException */ void removeStaleRecoveringRegions(final Set failedServers) throws IOException, InterruptedIOException { Set knownFailedServers = new HashSet(); if (failedServers != null) { for (ServerName tmpServerName : failedServers) { knownFailedServers.add(tmpServerName.getServerName()); } } this.recoveringRegionLock.lock(); try { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().removeStaleRecoveringRegions(knownFailedServers); } finally { this.recoveringRegionLock.unlock(); } } /** * @param path * @param batch * @return null on success, existing task on error */ private Task createTaskIfAbsent(String path, TaskBatch batch) { Task oldtask; // batch.installed is only changed via this function and // a single thread touches batch.installed. Task newtask = new Task(); newtask.batch = batch; oldtask = tasks.putIfAbsent(path, newtask); if (oldtask == null) { batch.installed++; return null; } // new task was not used. synchronized (oldtask) { if (oldtask.isOrphan()) { if (oldtask.status == SUCCESS) { // The task is already done. Do not install the batch for this // task because it might be too late for setDone() to update // batch.done. There is no need for the batch creator to wait for // this task to complete. return (null); } if (oldtask.status == IN_PROGRESS) { oldtask.batch = batch; batch.installed++; LOG.debug("Previously orphan task " + path + " is now being waited upon"); return null; } while (oldtask.status == FAILURE) { LOG.debug("wait for status of task " + path + " to change to DELETED"); SplitLogCounters.tot_mgr_wait_for_zk_delete.incrementAndGet(); try { oldtask.wait(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); LOG.warn("Interrupted when waiting for znode delete callback"); // fall through to return failure break; } } if (oldtask.status != DELETED) { LOG.warn("Failure because previously failed task" + " state still present. Waiting for znode delete callback" + " path=" + path); return oldtask; } // reinsert the newTask and it must succeed this time Task t = tasks.putIfAbsent(path, newtask); if (t == null) { batch.installed++; return null; } LOG.fatal("Logic error. Deleted task still present in tasks map"); assert false : "Deleted task still present in tasks map"; return t; } LOG.warn("Failure because two threads can't wait for the same task; path=" + path); return oldtask; } } Task findOrCreateOrphanTask(String path) { Task orphanTask = new Task(); Task task; task = tasks.putIfAbsent(path, orphanTask); if (task == null) { LOG.info("creating orphan task " + path); SplitLogCounters.tot_mgr_orphan_task_acquired.incrementAndGet(); task = orphanTask; } return task; } public void stop() { if (choreService != null) { choreService.shutdown(); } if (timeoutMonitor != null) { timeoutMonitor.cancel(true); } } void handleDeadWorker(ServerName workerName) { // resubmit the tasks on the TimeoutMonitor thread. Makes it easier // to reason about concurrency. Makes it easier to retry. synchronized (deadWorkersLock) { if (deadWorkers == null) { deadWorkers = new HashSet(100); } deadWorkers.add(workerName); } LOG.info("dead splitlog worker " + workerName); } void handleDeadWorkers(Set serverNames) { synchronized (deadWorkersLock) { if (deadWorkers == null) { deadWorkers = new HashSet(100); } deadWorkers.addAll(serverNames); } LOG.info("dead splitlog workers " + serverNames); } /** * This function is to set recovery mode from outstanding split log tasks from before or current * configuration setting * @param isForInitialization * @throws IOException throws if it's impossible to set recovery mode */ public void setRecoveryMode(boolean isForInitialization) throws IOException { ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().setRecoveryMode(isForInitialization); } public void markRegionsRecovering(ServerName server, Set userRegions) throws InterruptedIOException, IOException { if (userRegions == null || (!isLogReplaying())) { return; } try { this.recoveringRegionLock.lock(); // mark that we're creating recovering regions ((BaseCoordinatedStateManager) this.server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().markRegionsRecovering(server, userRegions); } finally { this.recoveringRegionLock.unlock(); } } /** * @return whether log is replaying */ public boolean isLogReplaying() { if (server.getCoordinatedStateManager() == null) return false; return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().isReplaying(); } /** * @return whether log is splitting */ public boolean isLogSplitting() { if (server.getCoordinatedStateManager() == null) return false; return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().isSplitting(); } /** * @return the current log recovery mode */ public RecoveryMode getRecoveryMode() { return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().getRecoveryMode(); } /** * Keeps track of the batch of tasks submitted together by a caller in splitLogDistributed(). * Clients threads use this object to wait for all their tasks to be done. *

* All access is synchronized. */ @InterfaceAudience.Private public static class TaskBatch { public int installed = 0; public int done = 0; public int error = 0; public volatile boolean isDead = false; @Override public String toString() { return ("installed = " + installed + " done = " + done + " error = " + error); } } /** * in memory state of an active task. */ @InterfaceAudience.Private public static class Task { public volatile long last_update; public volatile int last_version; public volatile ServerName cur_worker_name; public volatile TaskBatch batch; public volatile TerminationStatus status; public volatile AtomicInteger incarnation = new AtomicInteger(0); public final AtomicInteger unforcedResubmits = new AtomicInteger(); public volatile boolean resubmitThresholdReached; @Override public String toString() { return ("last_update = " + last_update + " last_version = " + last_version + " cur_worker_name = " + cur_worker_name + " status = " + status + " incarnation = " + incarnation + " resubmits = " + unforcedResubmits.get() + " batch = " + batch); } public Task() { last_version = -1; status = IN_PROGRESS; setUnassigned(); } public boolean isOrphan() { return (batch == null || batch.isDead); } public boolean isUnassigned() { return (cur_worker_name == null); } public void heartbeatNoDetails(long time) { last_update = time; } public void heartbeat(long time, int version, ServerName worker) { last_version = version; last_update = time; cur_worker_name = worker; } public void setUnassigned() { cur_worker_name = null; last_update = -1; } } /** * Periodically checks all active tasks and resubmits the ones that have timed out */ private class TimeoutMonitor extends ScheduledChore { private long lastLog = 0; public TimeoutMonitor(final int period, Stoppable stopper) { super("SplitLogManager Timeout Monitor", stopper, period); } @Override protected void chore() { int resubmitted = 0; int unassigned = 0; int tot = 0; boolean found_assigned_task = false; Set localDeadWorkers; synchronized (deadWorkersLock) { localDeadWorkers = deadWorkers; deadWorkers = null; } for (Map.Entry e : tasks.entrySet()) { String path = e.getKey(); Task task = e.getValue(); ServerName cur_worker = task.cur_worker_name; tot++; // don't easily resubmit a task which hasn't been picked up yet. It // might be a long while before a SplitLogWorker is free to pick up a // task. This is because a SplitLogWorker picks up a task one at a // time. If we want progress when there are no region servers then we // will have to run a SplitLogWorker thread in the Master. if (task.isUnassigned()) { unassigned++; continue; } found_assigned_task = true; if (localDeadWorkers != null && localDeadWorkers.contains(cur_worker)) { SplitLogCounters.tot_mgr_resubmit_dead_server_task.incrementAndGet(); if (((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().resubmitTask(path, task, FORCE)) { resubmitted++; } else { handleDeadWorker(cur_worker); LOG.warn("Failed to resubmit task " + path + " owned by dead " + cur_worker + ", will retry."); } } else if (((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().resubmitTask(path, task, CHECK)) { resubmitted++; } } if (tot > 0) { long now = EnvironmentEdgeManager.currentTime(); if (now > lastLog + 5000) { lastLog = now; LOG.info("total tasks = " + tot + " unassigned = " + unassigned + " tasks=" + tasks); } } if (resubmitted > 0) { LOG.info("resubmitted " + resubmitted + " out of " + tot + " tasks"); } // If there are pending tasks and all of them have been unassigned for // some time then put up a RESCAN node to ping the workers. // ZKSplitlog.DEFAULT_UNASSIGNED_TIMEOUT is of the order of minutes // because a. it is very unlikely that every worker had a // transient error when trying to grab the task b. if there are no // workers then all tasks wills stay unassigned indefinitely and the // manager will be indefinitely creating RESCAN nodes. TODO may be the // master should spawn both a manager and a worker thread to guarantee // that there is always one worker in the system if (tot > 0 && !found_assigned_task && ((EnvironmentEdgeManager.currentTime() - lastTaskCreateTime) > unassignedTimeout)) { for (Map.Entry e : tasks.entrySet()) { String key = e.getKey(); Task task = e.getValue(); // we have to do task.isUnassigned() check again because tasks might // have been asynchronously assigned. There is no locking required // for these checks ... it is OK even if tryGetDataSetWatch() is // called unnecessarily for a taskpath if (task.isUnassigned() && (task.status != FAILURE)) { // We just touch the znode to make sure its still there ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().checkTaskStillAvailable(key); } } ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().checkTasks(); SplitLogCounters.tot_mgr_resubmit_unassigned.incrementAndGet(); LOG.debug("resubmitting unassigned task(s) after timeout"); } Set failedDeletions = ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().getDetails().getFailedDeletions(); // Retry previously failed deletes if (failedDeletions.size() > 0) { List tmpPaths = new ArrayList(failedDeletions); for (String tmpPath : tmpPaths) { // deleteNode is an async call ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().deleteTask(tmpPath); } failedDeletions.removeAll(tmpPaths); } // Garbage collect left-over long timeInterval = EnvironmentEdgeManager.currentTime() - ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().getLastRecoveryTime(); if (!failedRecoveringRegionDeletions.isEmpty() || (tot == 0 && tasks.size() == 0 && (timeInterval > checkRecoveringTimeThreshold))) { // inside the function there have more checks before GC anything if (!failedRecoveringRegionDeletions.isEmpty()) { List, Boolean>> previouslyFailedDeletions = new ArrayList, Boolean>>(failedRecoveringRegionDeletions); failedRecoveringRegionDeletions.removeAll(previouslyFailedDeletions); for (Pair, Boolean> failedDeletion : previouslyFailedDeletions) { removeRecoveringRegions(failedDeletion.getFirst(), failedDeletion.getSecond()); } } else { removeRecoveringRegions(null, null); } } } } public enum ResubmitDirective { CHECK(), FORCE(); } public enum TerminationStatus { IN_PROGRESS("in_progress"), SUCCESS("success"), FAILURE("failure"), DELETED("deleted"); String statusMsg; TerminationStatus(String msg) { statusMsg = msg; } @Override public String toString() { return statusMsg; } } }