org.apache.hadoop.hbase.master.SplitLogManager Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import static org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective.CHECK;
import static org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective.FORCE;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.DELETED;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.FAILURE;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.IN_PROGRESS;
import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.SUCCESS;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.SplitLogCounters;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
import org.apache.hadoop.hbase.coordination.SplitLogManagerCoordination;
import org.apache.hadoop.hbase.coordination.SplitLogManagerCoordination.SplitLogManagerDetails;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.DefaultWALProvider;
import org.apache.hadoop.hbase.wal.WALFactory;
import com.google.common.annotations.VisibleForTesting;
/**
* Distributes the task of log splitting to the available region servers.
* Coordination happens via coordination engine. For every log file that has to be split a
* task is created. SplitLogWorkers race to grab a task.
*
* SplitLogManager monitors the tasks that it creates using the
* timeoutMonitor thread. If a task's progress is slow then
* {@link SplitLogManagerCoordination#checkTasks} will take away the
* task from the owner {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker}
* and the task will be up for grabs again. When the task is done then it is deleted
* by SplitLogManager.
*
*
Clients call {@link #splitLogDistributed(Path)} to split a region server's
* log files. The caller thread waits in this method until all the log files
* have been split.
*
*
All the coordination calls made by this class are asynchronous. This is mainly
* to help reduce response time seen by the callers.
*
*
There is race in this design between the SplitLogManager and the
* SplitLogWorker. SplitLogManager might re-queue a task that has in reality
* already been completed by a SplitLogWorker. We rely on the idempotency of
* the log splitting task for correctness.
*
*
It is also assumed that every log splitting task is unique and once
* completed (either with success or with error) it will be not be submitted
* again. If a task is resubmitted then there is a risk that old "delete task"
* can delete the re-submission.
*/
@InterfaceAudience.Private
public class SplitLogManager {
private static final Log LOG = LogFactory.getLog(SplitLogManager.class);
private Server server;
private final Stoppable stopper;
private final Configuration conf;
private final ChoreService choreService;
public static final int DEFAULT_UNASSIGNED_TIMEOUT = (3 * 60 * 1000); // 3 min
private long unassignedTimeout;
private long lastTaskCreateTime = Long.MAX_VALUE;
private long checkRecoveringTimeThreshold = 15000; // 15 seconds
private final List, Boolean>> failedRecoveringRegionDeletions = Collections
.synchronizedList(new ArrayList, Boolean>>());
/**
* In distributedLogReplay mode, we need touch both splitlog and recovering-regions znodes in one
* operation. So the lock is used to guard such cases.
*/
protected final ReentrantLock recoveringRegionLock = new ReentrantLock();
private final ConcurrentMap tasks = new ConcurrentHashMap();
private TimeoutMonitor timeoutMonitor;
private volatile Set deadWorkers = null;
private final Object deadWorkersLock = new Object();
/**
* Its OK to construct this object even when region-servers are not online. It does lookup the
* orphan tasks in coordination engine but it doesn't block waiting for them to be done.
* @param server the server instance
* @param conf the HBase configuration
* @param stopper the stoppable in case anything is wrong
* @param master the master services
* @param serverName the master server name
* @throws IOException
*/
public SplitLogManager(Server server, Configuration conf, Stoppable stopper,
MasterServices master, ServerName serverName) throws IOException {
this.server = server;
this.conf = conf;
this.stopper = stopper;
this.choreService = new ChoreService(serverName.toString() + "_splitLogManager_");
if (server.getCoordinatedStateManager() != null) {
SplitLogManagerCoordination coordination =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination();
Set failedDeletions = Collections.synchronizedSet(new HashSet());
SplitLogManagerDetails details =
new SplitLogManagerDetails(tasks, master, failedDeletions, serverName);
coordination.setDetails(details);
coordination.init();
// Determine recovery mode
}
this.unassignedTimeout =
conf.getInt("hbase.splitlog.manager.unassigned.timeout", DEFAULT_UNASSIGNED_TIMEOUT);
this.timeoutMonitor =
new TimeoutMonitor(conf.getInt("hbase.splitlog.manager.timeoutmonitor.period", 1000),
stopper);
choreService.scheduleChore(timeoutMonitor);
}
private FileStatus[] getFileList(List logDirs, PathFilter filter) throws IOException {
return getFileList(conf, logDirs, filter);
}
/**
* Get a list of paths that need to be split given a set of server-specific directories and
* optionally a filter.
*
* See {@link DefaultWALProvider#getServerNameFromWALDirectoryName} for more info on directory
* layout.
*
* Should be package-private, but is needed by
* {@link org.apache.hadoop.hbase.wal.WALSplitter#split(Path, Path, Path, FileSystem,
* Configuration, WALFactory)} for tests.
*/
@VisibleForTesting
public static FileStatus[] getFileList(final Configuration conf, final List logDirs,
final PathFilter filter)
throws IOException {
List fileStatus = new ArrayList();
for (Path logDir : logDirs) {
final FileSystem fs = logDir.getFileSystem(conf);
if (!fs.exists(logDir)) {
LOG.warn(logDir + " doesn't exist. Nothing to do!");
continue;
}
FileStatus[] logfiles = FSUtils.listStatus(fs, logDir, filter);
if (logfiles == null || logfiles.length == 0) {
LOG.info(logDir + " is empty dir, no logs to split");
} else {
Collections.addAll(fileStatus, logfiles);
}
}
FileStatus[] a = new FileStatus[fileStatus.size()];
return fileStatus.toArray(a);
}
/**
* @param logDir one region sever wal dir path in .logs
* @throws IOException if there was an error while splitting any log file
* @return cumulative size of the logfiles split
* @throws IOException
*/
public long splitLogDistributed(final Path logDir) throws IOException {
List logDirs = new ArrayList();
logDirs.add(logDir);
return splitLogDistributed(logDirs);
}
/**
* The caller will block until all the log files of the given region server have been processed -
* successfully split or an error is encountered - by an available worker region server. This
* method must only be called after the region servers have been brought online.
* @param logDirs List of log dirs to split
* @throws IOException If there was an error while splitting any log file
* @return cumulative size of the logfiles split
*/
public long splitLogDistributed(final List logDirs) throws IOException {
if (logDirs.isEmpty()) {
return 0;
}
Set serverNames = new HashSet();
for (Path logDir : logDirs) {
try {
ServerName serverName = DefaultWALProvider.getServerNameFromWALDirectoryName(logDir);
if (serverName != null) {
serverNames.add(serverName);
}
} catch (IllegalArgumentException e) {
// ignore invalid format error.
LOG.warn("Cannot parse server name from " + logDir);
}
}
return splitLogDistributed(serverNames, logDirs, null);
}
/**
* The caller will block until all the hbase:meta log files of the given region server have been
* processed - successfully split or an error is encountered - by an available worker region
* server. This method must only be called after the region servers have been brought online.
* @param logDirs List of log dirs to split
* @param filter the Path filter to select specific files for considering
* @throws IOException If there was an error while splitting any log file
* @return cumulative size of the logfiles split
*/
public long splitLogDistributed(final Set serverNames, final List logDirs,
PathFilter filter) throws IOException {
MonitoredTask status = TaskMonitor.get().createStatus("Doing distributed log split in " +
logDirs + " for serverName=" + serverNames);
FileStatus[] logfiles = getFileList(logDirs, filter);
status.setStatus("Checking directory contents...");
SplitLogCounters.tot_mgr_log_split_batch_start.incrementAndGet();
LOG.info("Started splitting " + logfiles.length + " logs in " + logDirs +
" for " + serverNames);
long t = EnvironmentEdgeManager.currentTime();
long totalSize = 0;
TaskBatch batch = new TaskBatch();
Boolean isMetaRecovery = (filter == null) ? null : false;
for (FileStatus lf : logfiles) {
// TODO If the log file is still being written to - which is most likely
// the case for the last log file - then its length will show up here
// as zero. The size of such a file can only be retrieved after
// recover-lease is done. totalSize will be under in most cases and the
// metrics that it drives will also be under-reported.
totalSize += lf.getLen();
String pathToLog = FSUtils.removeRootPath(lf.getPath(), conf);
if (!enqueueSplitTask(pathToLog, batch)) {
throw new IOException("duplicate log split scheduled for " + lf.getPath());
}
}
waitForSplittingCompletion(batch, status);
// remove recovering regions
if (filter == MasterFileSystem.META_FILTER /* reference comparison */) {
// we split meta regions and user regions separately therefore logfiles are either all for
// meta or user regions but won't for both( we could have mixed situations in tests)
isMetaRecovery = true;
}
removeRecoveringRegions(serverNames, isMetaRecovery);
if (batch.done != batch.installed) {
batch.isDead = true;
SplitLogCounters.tot_mgr_log_split_batch_err.incrementAndGet();
LOG.warn("error while splitting logs in " + logDirs + " installed = " + batch.installed
+ " but only " + batch.done + " done");
String msg = "error or interrupted while splitting logs in " + logDirs + " Task = " + batch;
status.abort(msg);
throw new IOException(msg);
}
for (Path logDir : logDirs) {
status.setStatus("Cleaning up log directory...");
final FileSystem fs = logDir.getFileSystem(conf);
try {
if (fs.exists(logDir) && !fs.delete(logDir, false)) {
LOG.warn("Unable to delete log src dir. Ignoring. " + logDir);
}
} catch (IOException ioe) {
FileStatus[] files = fs.listStatus(logDir);
if (files != null && files.length > 0) {
LOG.warn("Returning success without actually splitting and "
+ "deleting all the log files in path " + logDir + ": "
+ Arrays.toString(files), ioe);
} else {
LOG.warn("Unable to delete log src dir. Ignoring. " + logDir, ioe);
}
}
SplitLogCounters.tot_mgr_log_split_batch_success.incrementAndGet();
}
String msg =
"finished splitting (more than or equal to) " + totalSize + " bytes in " + batch.installed
+ " log files in " + logDirs + " in "
+ (EnvironmentEdgeManager.currentTime() - t) + "ms";
status.markComplete(msg);
LOG.info(msg);
return totalSize;
}
/**
* Add a task entry to coordination if it is not already there.
* @param taskname the path of the log to be split
* @param batch the batch this task belongs to
* @return true if a new entry is created, false if it is already there.
*/
boolean enqueueSplitTask(String taskname, TaskBatch batch) {
lastTaskCreateTime = EnvironmentEdgeManager.currentTime();
String task =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().prepareTask(taskname);
Task oldtask = createTaskIfAbsent(task, batch);
if (oldtask == null) {
// publish the task in the coordination engine
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().submitTask(task);
return true;
}
return false;
}
private void waitForSplittingCompletion(TaskBatch batch, MonitoredTask status) {
synchronized (batch) {
while ((batch.done + batch.error) != batch.installed) {
try {
status.setStatus("Waiting for distributed tasks to finish. " + " scheduled="
+ batch.installed + " done=" + batch.done + " error=" + batch.error);
int remaining = batch.installed - (batch.done + batch.error);
int actual = activeTasks(batch);
if (remaining != actual) {
LOG.warn("Expected " + remaining + " active tasks, but actually there are " + actual);
}
int remainingTasks =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().remainingTasksInCoordination();
if (remainingTasks >= 0 && actual > remainingTasks) {
LOG.warn("Expected at least" + actual + " tasks remaining, but actually there are "
+ remainingTasks);
}
if (remainingTasks == 0 || actual == 0) {
LOG.warn("No more task remaining, splitting "
+ "should have completed. Remaining tasks is " + remainingTasks
+ ", active tasks in map " + actual);
if (remainingTasks == 0 && actual == 0) {
return;
}
}
batch.wait(100);
if (stopper.isStopped()) {
LOG.warn("Stopped while waiting for log splits to be completed");
return;
}
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting for log splits to be completed");
Thread.currentThread().interrupt();
return;
}
}
}
}
@VisibleForTesting
ConcurrentMap getTasks() {
return tasks;
}
private int activeTasks(final TaskBatch batch) {
int count = 0;
for (Task t : tasks.values()) {
if (t.batch == batch && t.status == TerminationStatus.IN_PROGRESS) {
count++;
}
}
return count;
}
/**
* It removes recovering regions under /hbase/recovering-regions/[encoded region name] so that the
* region server hosting the region can allow reads to the recovered region
* @param serverNames servers which are just recovered
* @param isMetaRecovery whether current recovery is for the meta region on
* serverNames
*/
private void removeRecoveringRegions(final Set serverNames, Boolean isMetaRecovery) {
if (!isLogReplaying()) {
// the function is only used in WALEdit direct replay mode
return;
}
if (serverNames == null || serverNames.isEmpty()) return;
Set recoveredServerNameSet = new HashSet();
for (ServerName tmpServerName : serverNames) {
recoveredServerNameSet.add(tmpServerName.getServerName());
}
this.recoveringRegionLock.lock();
try {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().removeRecoveringRegions(recoveredServerNameSet,
isMetaRecovery);
} catch (IOException e) {
LOG.warn("removeRecoveringRegions got exception. Will retry", e);
if (serverNames != null && !serverNames.isEmpty()) {
this.failedRecoveringRegionDeletions.add(new Pair, Boolean>(serverNames,
isMetaRecovery));
}
} finally {
this.recoveringRegionLock.unlock();
}
}
/**
* It removes stale recovering regions under /hbase/recovering-regions/[encoded region name]
* during master initialization phase.
* @param failedServers A set of known failed servers
* @throws IOException
*/
void removeStaleRecoveringRegions(final Set failedServers) throws IOException,
InterruptedIOException {
Set knownFailedServers = new HashSet();
if (failedServers != null) {
for (ServerName tmpServerName : failedServers) {
knownFailedServers.add(tmpServerName.getServerName());
}
}
this.recoveringRegionLock.lock();
try {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().removeStaleRecoveringRegions(knownFailedServers);
} finally {
this.recoveringRegionLock.unlock();
}
}
/**
* @param path
* @param batch
* @return null on success, existing task on error
*/
private Task createTaskIfAbsent(String path, TaskBatch batch) {
Task oldtask;
// batch.installed is only changed via this function and
// a single thread touches batch.installed.
Task newtask = new Task();
newtask.batch = batch;
oldtask = tasks.putIfAbsent(path, newtask);
if (oldtask == null) {
batch.installed++;
return null;
}
// new task was not used.
synchronized (oldtask) {
if (oldtask.isOrphan()) {
if (oldtask.status == SUCCESS) {
// The task is already done. Do not install the batch for this
// task because it might be too late for setDone() to update
// batch.done. There is no need for the batch creator to wait for
// this task to complete.
return (null);
}
if (oldtask.status == IN_PROGRESS) {
oldtask.batch = batch;
batch.installed++;
LOG.debug("Previously orphan task " + path + " is now being waited upon");
return null;
}
while (oldtask.status == FAILURE) {
LOG.debug("wait for status of task " + path + " to change to DELETED");
SplitLogCounters.tot_mgr_wait_for_zk_delete.incrementAndGet();
try {
oldtask.wait();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("Interrupted when waiting for znode delete callback");
// fall through to return failure
break;
}
}
if (oldtask.status != DELETED) {
LOG.warn("Failure because previously failed task"
+ " state still present. Waiting for znode delete callback" + " path=" + path);
return oldtask;
}
// reinsert the newTask and it must succeed this time
Task t = tasks.putIfAbsent(path, newtask);
if (t == null) {
batch.installed++;
return null;
}
LOG.fatal("Logic error. Deleted task still present in tasks map");
assert false : "Deleted task still present in tasks map";
return t;
}
LOG.warn("Failure because two threads can't wait for the same task; path=" + path);
return oldtask;
}
}
Task findOrCreateOrphanTask(String path) {
Task orphanTask = new Task();
Task task;
task = tasks.putIfAbsent(path, orphanTask);
if (task == null) {
LOG.info("creating orphan task " + path);
SplitLogCounters.tot_mgr_orphan_task_acquired.incrementAndGet();
task = orphanTask;
}
return task;
}
public void stop() {
if (choreService != null) {
choreService.shutdown();
}
if (timeoutMonitor != null) {
timeoutMonitor.cancel(true);
}
}
void handleDeadWorker(ServerName workerName) {
// resubmit the tasks on the TimeoutMonitor thread. Makes it easier
// to reason about concurrency. Makes it easier to retry.
synchronized (deadWorkersLock) {
if (deadWorkers == null) {
deadWorkers = new HashSet(100);
}
deadWorkers.add(workerName);
}
LOG.info("dead splitlog worker " + workerName);
}
void handleDeadWorkers(Set serverNames) {
synchronized (deadWorkersLock) {
if (deadWorkers == null) {
deadWorkers = new HashSet(100);
}
deadWorkers.addAll(serverNames);
}
LOG.info("dead splitlog workers " + serverNames);
}
/**
* This function is to set recovery mode from outstanding split log tasks from before or current
* configuration setting
* @param isForInitialization
* @throws IOException throws if it's impossible to set recovery mode
*/
public void setRecoveryMode(boolean isForInitialization) throws IOException {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().setRecoveryMode(isForInitialization);
}
public void markRegionsRecovering(ServerName server, Set userRegions)
throws InterruptedIOException, IOException {
if (userRegions == null || (!isLogReplaying())) {
return;
}
try {
this.recoveringRegionLock.lock();
// mark that we're creating recovering regions
((BaseCoordinatedStateManager) this.server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().markRegionsRecovering(server, userRegions);
} finally {
this.recoveringRegionLock.unlock();
}
}
/**
* @return whether log is replaying
*/
public boolean isLogReplaying() {
if (server.getCoordinatedStateManager() == null) return false;
return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().isReplaying();
}
/**
* @return whether log is splitting
*/
public boolean isLogSplitting() {
if (server.getCoordinatedStateManager() == null) return false;
return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().isSplitting();
}
/**
* @return the current log recovery mode
*/
public RecoveryMode getRecoveryMode() {
return ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().getRecoveryMode();
}
/**
* Keeps track of the batch of tasks submitted together by a caller in splitLogDistributed().
* Clients threads use this object to wait for all their tasks to be done.
*
* All access is synchronized.
*/
@InterfaceAudience.Private
public static class TaskBatch {
public int installed = 0;
public int done = 0;
public int error = 0;
public volatile boolean isDead = false;
@Override
public String toString() {
return ("installed = " + installed + " done = " + done + " error = " + error);
}
}
/**
* in memory state of an active task.
*/
@InterfaceAudience.Private
public static class Task {
public volatile long last_update;
public volatile int last_version;
public volatile ServerName cur_worker_name;
public volatile TaskBatch batch;
public volatile TerminationStatus status;
public volatile AtomicInteger incarnation = new AtomicInteger(0);
public final AtomicInteger unforcedResubmits = new AtomicInteger();
public volatile boolean resubmitThresholdReached;
@Override
public String toString() {
return ("last_update = " + last_update + " last_version = " + last_version
+ " cur_worker_name = " + cur_worker_name + " status = " + status + " incarnation = "
+ incarnation + " resubmits = " + unforcedResubmits.get() + " batch = " + batch);
}
public Task() {
last_version = -1;
status = IN_PROGRESS;
setUnassigned();
}
public boolean isOrphan() {
return (batch == null || batch.isDead);
}
public boolean isUnassigned() {
return (cur_worker_name == null);
}
public void heartbeatNoDetails(long time) {
last_update = time;
}
public void heartbeat(long time, int version, ServerName worker) {
last_version = version;
last_update = time;
cur_worker_name = worker;
}
public void setUnassigned() {
cur_worker_name = null;
last_update = -1;
}
}
/**
* Periodically checks all active tasks and resubmits the ones that have timed out
*/
private class TimeoutMonitor extends ScheduledChore {
private long lastLog = 0;
public TimeoutMonitor(final int period, Stoppable stopper) {
super("SplitLogManager Timeout Monitor", stopper, period);
}
@Override
protected void chore() {
int resubmitted = 0;
int unassigned = 0;
int tot = 0;
boolean found_assigned_task = false;
Set localDeadWorkers;
synchronized (deadWorkersLock) {
localDeadWorkers = deadWorkers;
deadWorkers = null;
}
for (Map.Entry e : tasks.entrySet()) {
String path = e.getKey();
Task task = e.getValue();
ServerName cur_worker = task.cur_worker_name;
tot++;
// don't easily resubmit a task which hasn't been picked up yet. It
// might be a long while before a SplitLogWorker is free to pick up a
// task. This is because a SplitLogWorker picks up a task one at a
// time. If we want progress when there are no region servers then we
// will have to run a SplitLogWorker thread in the Master.
if (task.isUnassigned()) {
unassigned++;
continue;
}
found_assigned_task = true;
if (localDeadWorkers != null && localDeadWorkers.contains(cur_worker)) {
SplitLogCounters.tot_mgr_resubmit_dead_server_task.incrementAndGet();
if (((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().resubmitTask(path, task, FORCE)) {
resubmitted++;
} else {
handleDeadWorker(cur_worker);
LOG.warn("Failed to resubmit task " + path + " owned by dead " + cur_worker
+ ", will retry.");
}
} else if (((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().resubmitTask(path, task, CHECK)) {
resubmitted++;
}
}
if (tot > 0) {
long now = EnvironmentEdgeManager.currentTime();
if (now > lastLog + 5000) {
lastLog = now;
LOG.info("total tasks = " + tot + " unassigned = " + unassigned + " tasks=" + tasks);
}
}
if (resubmitted > 0) {
LOG.info("resubmitted " + resubmitted + " out of " + tot + " tasks");
}
// If there are pending tasks and all of them have been unassigned for
// some time then put up a RESCAN node to ping the workers.
// ZKSplitlog.DEFAULT_UNASSIGNED_TIMEOUT is of the order of minutes
// because a. it is very unlikely that every worker had a
// transient error when trying to grab the task b. if there are no
// workers then all tasks wills stay unassigned indefinitely and the
// manager will be indefinitely creating RESCAN nodes. TODO may be the
// master should spawn both a manager and a worker thread to guarantee
// that there is always one worker in the system
if (tot > 0
&& !found_assigned_task
&& ((EnvironmentEdgeManager.currentTime() - lastTaskCreateTime) > unassignedTimeout)) {
for (Map.Entry e : tasks.entrySet()) {
String key = e.getKey();
Task task = e.getValue();
// we have to do task.isUnassigned() check again because tasks might
// have been asynchronously assigned. There is no locking required
// for these checks ... it is OK even if tryGetDataSetWatch() is
// called unnecessarily for a taskpath
if (task.isUnassigned() && (task.status != FAILURE)) {
// We just touch the znode to make sure its still there
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().checkTaskStillAvailable(key);
}
}
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().checkTasks();
SplitLogCounters.tot_mgr_resubmit_unassigned.incrementAndGet();
LOG.debug("resubmitting unassigned task(s) after timeout");
}
Set failedDeletions =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().getDetails().getFailedDeletions();
// Retry previously failed deletes
if (failedDeletions.size() > 0) {
List tmpPaths = new ArrayList(failedDeletions);
for (String tmpPath : tmpPaths) {
// deleteNode is an async call
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().deleteTask(tmpPath);
}
failedDeletions.removeAll(tmpPaths);
}
// Garbage collect left-over
long timeInterval =
EnvironmentEdgeManager.currentTime()
- ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().getLastRecoveryTime();
if (!failedRecoveringRegionDeletions.isEmpty()
|| (tot == 0 && tasks.size() == 0 && (timeInterval > checkRecoveringTimeThreshold))) {
// inside the function there have more checks before GC anything
if (!failedRecoveringRegionDeletions.isEmpty()) {
List, Boolean>> previouslyFailedDeletions =
new ArrayList, Boolean>>(failedRecoveringRegionDeletions);
failedRecoveringRegionDeletions.removeAll(previouslyFailedDeletions);
for (Pair, Boolean> failedDeletion : previouslyFailedDeletions) {
removeRecoveringRegions(failedDeletion.getFirst(), failedDeletion.getSecond());
}
} else {
removeRecoveringRegions(null, null);
}
}
}
}
public enum ResubmitDirective {
CHECK(), FORCE();
}
public enum TerminationStatus {
IN_PROGRESS("in_progress"), SUCCESS("success"), FAILURE("failure"), DELETED("deleted");
String statusMsg;
TerminationStatus(String msg) {
statusMsg = msg;
}
@Override
public String toString() {
return statusMsg;
}
}
}