All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceShipper Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.replication.regionserver;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.WALEntryBatch;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.WAL.Entry;

import org.apache.hadoop.hbase.shaded.com.google.common.cache.CacheBuilder;
import org.apache.hadoop.hbase.shaded.com.google.common.cache.CacheLoader;
import org.apache.hadoop.hbase.shaded.com.google.common.cache.LoadingCache;

/**
 * This thread reads entries from a queue and ships them. Entries are placed onto the queue by
 * ReplicationSourceWALReaderThread
 */
@InterfaceAudience.Private
public class ReplicationSourceShipper extends Thread {
  private static final Log LOG = LogFactory.getLog(ReplicationSourceShipper.class);

  // Hold the state of a replication worker thread
  public enum WorkerState {
    RUNNING,
    STOPPED,
    FINISHED,  // The worker is done processing a recovered queue
  }

  protected final Configuration conf;
  protected final String walGroupId;
  protected final PriorityBlockingQueue queue;
  protected final ReplicationSourceInterface source;

  // Last position in the log that we sent to ZooKeeper
  protected long lastLoggedPosition = -1;
  // Path of the current log
  protected volatile Path currentPath;
  // Current state of the worker thread
  private WorkerState state;
  protected ReplicationSourceWALReader entryReader;

  // How long should we sleep for each retry
  protected final long sleepForRetries;
  // Maximum number of retries before taking bold actions
  protected final int maxRetriesMultiplier;

  // Use guava cache to set ttl for each key
  private final LoadingCache canSkipWaitingSet = CacheBuilder.newBuilder()
      .expireAfterAccess(1, TimeUnit.DAYS).build(
      new CacheLoader() {
        @Override
        public Boolean load(String key) throws Exception {
          return false;
        }
      }
  );

  public ReplicationSourceShipper(Configuration conf, String walGroupId,
      PriorityBlockingQueue queue, ReplicationSourceInterface source) {
    this.conf = conf;
    this.walGroupId = walGroupId;
    this.queue = queue;
    this.source = source;
    this.sleepForRetries =
        this.conf.getLong("replication.source.sleepforretries", 1000);    // 1 second
    this.maxRetriesMultiplier =
        this.conf.getInt("replication.source.maxretriesmultiplier", 300); // 5 minutes @ 1 sec per
  }

  @Override
  public void run() {
    setWorkerState(WorkerState.RUNNING);
    // Loop until we close down
    while (isActive()) {
      int sleepMultiplier = 1;
      // Sleep until replication is enabled again
      if (!source.isPeerEnabled()) {
        if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
          sleepMultiplier++;
        }
        continue;
      }

      while (entryReader == null) {
        if (sleepForRetries("Replication WAL entry reader thread not initialized",
          sleepMultiplier)) {
          sleepMultiplier++;
        }
      }

      try {
        WALEntryBatch entryBatch = entryReader.take();
        for (Map.Entry entry : entryBatch.getLastSeqIds().entrySet()) {
          waitingUntilCanPush(entry);
        }
        shipEdits(entryBatch);
      } catch (InterruptedException e) {
        LOG.trace("Interrupted while waiting for next replication entry batch", e);
        Thread.currentThread().interrupt();
      }
    }
    // If the worker exits run loop without finishing its task, mark it as stopped.
    if (state != WorkerState.FINISHED) {
      setWorkerState(WorkerState.STOPPED);
    }
  }

  /**
   * Do the shipping logic
   */
  protected void shipEdits(WALEntryBatch entryBatch) {
    List entries = entryBatch.getWalEntries();
    long lastReadPosition = entryBatch.getLastWalPosition();
    currentPath = entryBatch.getLastWalPath();
    int sleepMultiplier = 0;
    if (entries.isEmpty()) {
      if (lastLoggedPosition != lastReadPosition) {
        // Save positions to meta table before zk.
        updateSerialRepPositions(entryBatch.getLastSeqIds());
        updateLogPosition(lastReadPosition);
        // if there was nothing to ship and it's not an error
        // set "ageOfLastShippedOp" to  to indicate that we're current
        source.getSourceMetrics().setAgeOfLastShippedOp(EnvironmentEdgeManager.currentTime(),
          walGroupId);
      }
      return;
    }
    int currentSize = (int) entryBatch.getHeapSize();
    while (isActive()) {
      try {
        try {
          source.tryThrottle(currentSize);
        } catch (InterruptedException e) {
          LOG.debug("Interrupted while sleeping for throttling control");
          Thread.currentThread().interrupt();
          // current thread might be interrupted to terminate
          // directly go back to while() for confirm this
          continue;
        }

        // create replicateContext here, so the entries can be GC'd upon return from this call
        // stack
        ReplicationEndpoint.ReplicateContext replicateContext =
            new ReplicationEndpoint.ReplicateContext();
        replicateContext.setEntries(entries).setSize(currentSize);
        replicateContext.setWalGroupId(walGroupId);

        long startTimeNs = System.nanoTime();
        // send the edits to the endpoint. Will block until the edits are shipped and acknowledged
        boolean replicated = source.getReplicationEndpoint().replicate(replicateContext);
        long endTimeNs = System.nanoTime();

        if (!replicated) {
          continue;
        } else {
          sleepMultiplier = Math.max(sleepMultiplier - 1, 0);
        }

        if (this.lastLoggedPosition != lastReadPosition) {
          //Clean up hfile references
          int size = entries.size();
          for (int i = 0; i < size; i++) {
            cleanUpHFileRefs(entries.get(i).getEdit());
          }

          // Save positions to meta table before zk.
          updateSerialRepPositions(entryBatch.getLastSeqIds());
          //Log and clean up WAL logs
          updateLogPosition(lastReadPosition);
        }

        source.postShipEdits(entries, currentSize);
        // FIXME check relationship between wal group and overall
        source.getSourceMetrics().shipBatch(entryBatch.getNbOperations(), currentSize,
          entryBatch.getNbHFiles());
        source.getSourceMetrics().setAgeOfLastShippedOp(
          entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
        if (LOG.isTraceEnabled()) {
          LOG.trace("Replicated " + entries.size() + " entries or " + entryBatch.getNbOperations()
              + " operations in " + ((endTimeNs - startTimeNs) / 1000000) + " ms");
        }
        break;
      } catch (Exception ex) {
        LOG.warn(source.getReplicationEndpoint().getClass().getName() + " threw unknown exception:"
            + org.apache.hadoop.util.StringUtils.stringifyException(ex));
        if (sleepForRetries("ReplicationEndpoint threw exception", sleepMultiplier)) {
          sleepMultiplier++;
        }
      }
    }
  }

  private void waitingUntilCanPush(Map.Entry entry) {
    String key = entry.getKey();
    long seq = entry.getValue();
    boolean deleteKey = false;
    if (seq <= 0) {
      // There is a REGION_CLOSE marker, we can not continue skipping after this entry.
      deleteKey = true;
      seq = -seq;
    }

    if (!canSkipWaitingSet.getUnchecked(key)) {
      try {
        source.getSourceManager().waitUntilCanBePushed(Bytes.toBytes(key), seq, source.getPeerId());
      } catch (IOException e) {
        LOG.error("waitUntilCanBePushed fail", e);
        throw new RuntimeException("waitUntilCanBePushed fail");
      } catch (InterruptedException e) {
        LOG.warn("waitUntilCanBePushed interrupted", e);
        Thread.currentThread().interrupt();
      }
      canSkipWaitingSet.put(key, true);
    }
    if (deleteKey) {
      canSkipWaitingSet.invalidate(key);
    }
  }

  private void cleanUpHFileRefs(WALEdit edit) throws IOException {
    String peerId = source.getPeerId();
    if (peerId.contains("-")) {
      // peerClusterZnode will be in the form peerId + "-" + rsZNode.
      // A peerId will not have "-" in its name, see HBASE-11394
      peerId = peerId.split("-")[0];
    }
    List cells = edit.getCells();
    int totalCells = cells.size();
    for (int i = 0; i < totalCells; i++) {
      Cell cell = cells.get(i);
      if (CellUtil.matchingQualifier(cell, WALEdit.BULK_LOAD)) {
        BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cell);
        List stores = bld.getStoresList();
        int totalStores = stores.size();
        for (int j = 0; j < totalStores; j++) {
          List storeFileList = stores.get(j).getStoreFileList();
          source.getSourceManager().cleanUpHFileRefs(peerId, storeFileList);
          source.getSourceMetrics().decrSizeOfHFileRefsQueue(storeFileList.size());
        }
      }
    }
  }

  protected void updateLogPosition(long lastReadPosition) {
    source.getSourceManager().logPositionAndCleanOldLogs(currentPath, source.getPeerClusterZnode(),
      lastReadPosition, false, false);
    lastLoggedPosition = lastReadPosition;
  }

  private void updateSerialRepPositions(Map lastPositionsForSerialScope) {
    try {
      MetaTableAccessor.updateReplicationPositions(source.getSourceManager().getConnection(),
        source.getPeerId(), lastPositionsForSerialScope);
    } catch (IOException e) {
      LOG.error("updateReplicationPositions fail", e);
      throw new RuntimeException("updateReplicationPositions fail");
    }
  }

  public void startup(UncaughtExceptionHandler handler) {
    String name = Thread.currentThread().getName();
    Threads.setDaemonThreadRunning(this, name + ".replicationSource." + walGroupId + ","
        + source.getPeerClusterZnode(), handler);
  }

  public PriorityBlockingQueue getLogQueue() {
    return this.queue;
  }

  public Path getCurrentPath() {
    return this.entryReader.getCurrentPath();
  }

  public long getCurrentPosition() {
    return this.lastLoggedPosition;
  }

  public void setWALReader(ReplicationSourceWALReader entryReader) {
    this.entryReader = entryReader;
  }

  public long getStartPosition() {
    return 0;
  }

  protected boolean isActive() {
    return source.isSourceActive() && state == WorkerState.RUNNING && !isInterrupted();
  }

  public void setWorkerState(WorkerState state) {
    this.state = state;
  }

  public WorkerState getWorkerState() {
    return state;
  }

  public void stopWorker() {
    setWorkerState(WorkerState.STOPPED);
  }

  public boolean isFinished() {
    return state == WorkerState.FINISHED;
  }

  /**
   * Do the sleeping logic
   * @param msg Why we sleep
   * @param sleepMultiplier by how many times the default sleeping time is augmented
   * @return True if sleepMultiplier is < maxRetriesMultiplier
   */
  public boolean sleepForRetries(String msg, int sleepMultiplier) {
    try {
      if (LOG.isTraceEnabled()) {
        LOG.trace(msg + ", sleeping " + sleepForRetries + " times " + sleepMultiplier);
      }
      Thread.sleep(this.sleepForRetries * sleepMultiplier);
    } catch (InterruptedException e) {
      LOG.debug("Interrupted while sleeping between retries");
      Thread.currentThread().interrupt();
    }
    return sleepMultiplier < maxRetriesMultiplier;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy