org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Server functionality for HBase
There is a newer version: 3.0.0-beta-1
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.master.assignment;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.master.procedure.TableQueue;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.quotas.QuotaExceededException;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.HStoreFile;
import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;

/**
 * The procedure to split a region in a table.
 * Takes lock on the parent region.
 * It holds the lock for the life of the procedure.
 * Throws exception on construction if determines context hostile to spllt (cluster going
 * down or master is shutting down or table is disabled).
 */
@InterfaceAudience.Private
public class SplitTableRegionProcedure
    extends AbstractStateMachineRegionProcedure {
  private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
  private Boolean traceEnabled = null;
  private RegionInfo daughter_1_RI;
  private RegionInfo daughter_2_RI;
  private byte[] bestSplitRow;
  private RegionSplitPolicy splitPolicy;

  public SplitTableRegionProcedure() {
    // Required by the Procedure framework to create the procedure on replay
  }

  public SplitTableRegionProcedure(final MasterProcedureEnv env,
      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
    super(env, regionToSplit);
    preflightChecks(env, true);
    // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
    // we fail-fast on construction. There it skips the split with just a warning.
    checkOnline(env, regionToSplit);
    this.bestSplitRow = splitRow;
    checkSplittable(env, regionToSplit, bestSplitRow);
    final TableName table = regionToSplit.getTable();
    final long rid = getDaughterRegionIdTimestamp(regionToSplit);
    this.daughter_1_RI = RegionInfoBuilder.newBuilder(table)
        .setStartKey(regionToSplit.getStartKey())
        .setEndKey(bestSplitRow)
        .setSplit(false)
        .setRegionId(rid)
        .build();
    this.daughter_2_RI = RegionInfoBuilder.newBuilder(table)
        .setStartKey(bestSplitRow)
        .setEndKey(regionToSplit.getEndKey())
        .setSplit(false)
        .setRegionId(rid)
        .build();
    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    if(htd.getRegionSplitPolicyClassName() != null) {
      // Since we don't have region reference here, creating the split policy instance without it.
      // This can be used to invoke methods which don't require Region reference. This instantiation
      // of a class on Master-side though it only makes sense on the RegionServer-side is
      // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
      Class clazz =
          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
    }
  }

  /**
   * Check whether there are recovered.edits in the parent closed region.
   * @param env master env
   * @throws IOException IOException
   */
  static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException {
    return WALSplitter.hasRecoveredEdits(env.getMasterConfiguration(), ri);
  }

  /**
   * Check whether the region is splittable
   * @param env MasterProcedureEnv
   * @param regionToSplit parent Region to be split
   * @param splitRow if splitRow is not specified, will first try to get bestSplitRow from RS
   * @throws IOException
   */
  private void checkSplittable(final MasterProcedureEnv env,
      final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
    // Ask the remote RS if this region is splittable.
    // If we get an IOE, report it along w/ the failure so can see why we are not splittable at this time.
    if(regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
      throw new IllegalArgumentException ("Can't invoke split on non-default regions directly");
    }
    RegionStateNode node =
        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
    IOException splittableCheckIOE = null;
    boolean splittable = false;
    if (node != null) {
      try {
        if (bestSplitRow == null || bestSplitRow.length == 0) {
          LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS");
        }
        // Always set bestSplitRow request as true here,
        // need to call Region#checkSplit to check it splittable or not
        GetRegionInfoResponse response =
            Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true);
        if(bestSplitRow == null || bestSplitRow.length == 0) {
          bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
        }
        splittable = response.hasSplittable() && response.getSplittable();

        if (LOG.isDebugEnabled()) {
          LOG.debug("Splittable=" + splittable + " " + node.toShortString());
        }
      } catch (IOException e) {
        splittableCheckIOE = e;
      }
    }

    if (!splittable) {
      IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
      if (splittableCheckIOE != null) e.initCause(splittableCheckIOE);
      throw e;
    }

    if(bestSplitRow == null || bestSplitRow.length == 0) {
      throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
          + "maybe table is too small for auto split. For force split, try specifying split row");
    }

    if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
      throw new DoNotRetryIOException(
        "Split row is equal to startkey: " + Bytes.toStringBinary(splitRow));
    }

    if (!regionToSplit.containsRow(bestSplitRow)) {
      throw new DoNotRetryIOException(
        "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) +
        " region: " + regionToSplit);
    }
  }

  /**
   * Calculate daughter regionid to use.
   * @param hri Parent {@link RegionInfo}
   * @return Daughter region id (timestamp) to use.
   */
  private static long getDaughterRegionIdTimestamp(final RegionInfo hri) {
    long rid = EnvironmentEdgeManager.currentTime();
    // Regionid is timestamp.  Can't be less than that of parent else will insert
    // at wrong location in hbase:meta (See HBASE-710).
    if (rid < hri.getRegionId()) {
      LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
        " but current time here is " + rid);
      rid = hri.getRegionId() + 1;
    }
    return rid;
  }

  @Override
  protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state)
      throws InterruptedException {
    LOG.trace("{} execute state={}", this, state);

    try {
      switch (state) {
        case SPLIT_TABLE_REGION_PREPARE:
          if (prepareSplitRegion(env)) {
            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION);
            break;
          } else {
            return Flow.NO_MORE_STATE;
          }
        case SPLIT_TABLE_REGION_PRE_OPERATION:
          preSplitRegion(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
          break;
        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
          addChildProcedure(createUnassignProcedures(env, getRegionReplication(env)));
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
          break;
        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
          if (hasRecoveredEdits(env, getRegion())) {
            // If recovered edits, reopen parent region and then re-run the close by going back to
            // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times
            // (TODO: Add being able to open a region in read-only mode). Open the primary replica
            // in this case only where we just want to pickup the left-out replicated.edits.
            LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!",
                getRegion().getEncodedName());
            addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion()));
            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
          } else {
            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
          }
          break;
        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
          createDaughterRegions(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
          break;
        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
          writeMaxSequenceIdFile(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META);
          break;
        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
          preSplitRegionBeforeMETA(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META);
          break;
        case SPLIT_TABLE_REGION_UPDATE_META:
          updateMeta(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META);
          break;
        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
          preSplitRegionAfterMETA(env);
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
          break;
        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
          addChildProcedure(createAssignProcedures(env, getRegionReplication(env)));
          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
          break;
        case SPLIT_TABLE_REGION_POST_OPERATION:
          postSplitRegion(env);
          return Flow.NO_MORE_STATE;
        default:
          throw new UnsupportedOperationException(this + " unhandled state=" + state);
      }
    } catch (IOException e) {
      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
      if (!isRollbackSupported(state)) {
        // We reach a state that cannot be rolled back. We just need to keep retrying.
        LOG.warn(msg, e);
      } else {
        LOG.error(msg, e);
        setFailure("master-split-regions", e);
      }
    }
    // if split fails,  need to call ((HRegion)parent).clearSplit() when it is a force split
    return Flow.HAS_MORE_STATE;
  }

  /**
   * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously
   * submitted for parent region to be split (rollback doesn't wait on the completion of the
   * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures.
   * See HBASE-19851 for details.
   */
  @Override
  protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state)
      throws IOException, InterruptedException {
    if (isTraceEnabled()) {
      LOG.trace(this + " rollback state=" + state);
    }

    try {
      switch (state) {
        case SPLIT_TABLE_REGION_POST_OPERATION:
        case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
        case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
        case SPLIT_TABLE_REGION_UPDATE_META:
          // PONR
          throw new UnsupportedOperationException(this + " unhandled state=" + state);
        case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META:
          break;
        case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
        case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE:
          // Doing nothing, as re-open parent region would clean up daughter region directories.
          break;
        case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
          // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
          // we will bring parent region online
          break;
        case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
          openParentRegion(env);
          break;
        case SPLIT_TABLE_REGION_PRE_OPERATION:
          postRollBackSplitRegion(env);
          break;
        case SPLIT_TABLE_REGION_PREPARE:
          break; // nothing to do
        default:
          throw new UnsupportedOperationException(this + " unhandled state=" + state);
      }
    } catch (IOException e) {
      // This will be retried. Unless there is a bug in the code,
      // this should be just a "temporary error" (e.g. network down)
      LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state +
          " for splitting the region "
        + getParentRegion().getEncodedName() + " in table " + getTableName(), e);
      throw e;
    }
  }

  /*
   * Check whether we are in the state that can be rollback
   */
  @Override
  protected boolean isRollbackSupported(final SplitTableRegionState state) {
    switch (state) {
      case SPLIT_TABLE_REGION_POST_OPERATION:
      case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
      case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META:
      case SPLIT_TABLE_REGION_UPDATE_META:
        // It is not safe to rollback if we reach to these states.
        return false;
      default:
        break;
    }
    return true;
  }

  @Override
  protected SplitTableRegionState getState(final int stateId) {
    return SplitTableRegionState.forNumber(stateId);
  }

  @Override
  protected int getStateId(final SplitTableRegionState state) {
    return state.getNumber();
  }

  @Override
  protected SplitTableRegionState getInitialState() {
    return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE;
  }

  @Override
  protected void serializeStateData(ProcedureStateSerializer serializer)
      throws IOException {
    super.serializeStateData(serializer);

    final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg =
        MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
        .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI))
        .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI));
    serializer.serialize(splitTableRegionMsg.build());
  }

  @Override
  protected void deserializeStateData(ProcedureStateSerializer serializer)
      throws IOException {
    super.deserializeStateData(serializer);

    final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg =
        serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class);
    setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
    setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
    assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
    daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
    daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
  }

  @Override
  public void toStringClassDetails(StringBuilder sb) {
    sb.append(getClass().getSimpleName());
    sb.append(" table=");
    sb.append(getTableName());
    sb.append(", parent=");
    sb.append(getParentRegion().getShortNameToLog());
    sb.append(", daughterA=");
    sb.append(daughter_1_RI.getShortNameToLog());
    sb.append(", daughterB=");
    sb.append(daughter_2_RI.getShortNameToLog());
  }

  private RegionInfo getParentRegion() {
    return getRegion();
  }

  @Override
  public TableOperationType getTableOperationType() {
    return TableOperationType.REGION_SPLIT;
  }

  @Override
  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
    return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
  }

  private byte[] getSplitRow() {
    return daughter_2_RI.getStartKey();
  }

  private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };

  /**
   * Prepare to Split region.
   * @param env MasterProcedureEnv
   */
  @VisibleForTesting
  public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException {
    // Fail if we are taking snapshot for the given table
    if (env.getMasterServices().getSnapshotManager()
      .isTakingSnapshot(getParentRegion().getTable())) {
      setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() +
        ", because we are taking snapshot for the table " + getParentRegion().getTable()));
      return false;
    }
    // Check whether the region is splittable
    RegionStateNode node =
        env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());

    if (node == null) {
      throw new UnknownRegionException(getParentRegion().getRegionNameAsString());
    }

    RegionInfo parentHRI = node.getRegionInfo();
    if (parentHRI == null) {
      LOG.info("Unsplittable; parent region is null; node={}", node);
      return false;
    }
    // Lookup the parent HRI state from the AM, which has the latest updated info.
    // Protect against the case where concurrent SPLIT requests came in and succeeded
    // just before us.
    if (node.isInState(State.SPLIT)) {
      LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT");
      return false;
    }
    if (parentHRI.isSplit() || parentHRI.isOffline()) {
      LOG.info("Split of " + parentHRI + " skipped because offline/split.");
      return false;
    }

    // expected parent to be online or closed
    if (!node.isInState(EXPECTED_SPLIT_STATES)) {
      // We may have SPLIT already?
      setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() +
          " FAILED because state=" + node.getState() + "; expected " +
          Arrays.toString(EXPECTED_SPLIT_STATES)));
      return false;
    }

    // Since we have the lock and the master is coordinating the operation
    // we are always able to split the region
    if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
      LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI);
      setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() +
          " failed due to split switch off"));
      return false;
    }

    // See HBASE-21395, for 2.0.x and 2.1.x only.
    // A safe fence here, if there is a table procedure going on, abort the split.
    // There some cases that may lead to table procedure roll back (more serious
    // than roll back the split procedure here), or the split parent was brought online
    // by the table procedure because of the race between split procedure and table procedure
    List tableProcedures = env
        .getMasterServices().getProcedures().stream()
        .filter(p -> p instanceof AbstractStateMachineTableProcedure)
        .map(p -> (AbstractStateMachineTableProcedure) p)
        .filter(p -> p.getTableName().equals(getParentRegion().getTable()) &&
            !p.isFinished() && TableQueue.requireTableExclusiveLock(p))
        .collect(Collectors.toList());
    if (tableProcedures != null && tableProcedures.size() > 0) {
      throw new DoNotRetryIOException(tableProcedures.get(0).toString()
          + " is going on against the same table, abort the split of " + this
          .toString());
    }

    // set node state as SPLITTING
    node.setState(State.SPLITTING);

    return true;
  }

  /**
   * Action before splitting region in a table.
   * @param env MasterProcedureEnv
   */
  private void preSplitRegion(final MasterProcedureEnv env)
      throws IOException, InterruptedException {
    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser());
    }

    // TODO: Clean up split and merge. Currently all over the place.
    // Notify QuotaManager and RegionNormalizer
    try {
      env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion());
    } catch (QuotaExceededException e) {
      env.getMasterServices().getRegionNormalizer().planSkipped(this.getParentRegion(),
          NormalizationPlan.PlanType.SPLIT);
      throw e;
    }
  }

  /**
   * Action after rollback a split table region action.
   * @param env MasterProcedureEnv
   */
  private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException {
    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.postRollBackSplitRegionAction(getUser());
    }
  }

  /**
   * Rollback close parent region
   * @param env MasterProcedureEnv
   */
  private void openParentRegion(final MasterProcedureEnv env) throws IOException {
    // Check whether the region is closed; if so, open it in the same server
    final int regionReplication = getRegionReplication(env);
    final ServerName serverName = getParentRegionServerName(env);

    final AssignProcedure[] procs = createAssignProcedures(regionReplication, env,
      Collections.singletonList(getParentRegion()), serverName);
    env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
  }

  /**
   * Create daughter regions
   * @param env MasterProcedureEnv
   */
  @VisibleForTesting
  public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName());
    final FileSystem fs = mfs.getFileSystem();
    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
      env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
    regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI);

    Pair expectedReferences = splitStoreFiles(env, regionFs);

    assertReferenceFileCount(fs, expectedReferences.getFirst(),
      regionFs.getSplitsDir(daughter_1_RI));
    //Move the files from the temporary .splits to the final /table/region directory
    regionFs.commitDaughterRegion(daughter_1_RI);
    assertReferenceFileCount(fs, expectedReferences.getFirst(),
      new Path(tabledir, daughter_1_RI.getEncodedName()));

    assertReferenceFileCount(fs, expectedReferences.getSecond(),
      regionFs.getSplitsDir(daughter_2_RI));
    regionFs.commitDaughterRegion(daughter_2_RI);
    assertReferenceFileCount(fs, expectedReferences.getSecond(),
      new Path(tabledir, daughter_2_RI.getEncodedName()));
  }

  /**
   * Create Split directory
   * @param env MasterProcedureEnv
   */
  private Pair splitStoreFiles(final MasterProcedureEnv env,
      final HRegionFileSystem regionFs) throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Configuration conf = env.getMasterConfiguration();
    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    //
    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- re-run createSplitsDir will
    // clean this up.
    int nbFiles = 0;
    final Map> files =
        new HashMap>(htd.getColumnFamilyCount());
    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
      String family = cfd.getNameAsString();
      Collection sfis = regionFs.getStoreFiles(family);
      if (sfis == null) {
        continue;
      }
      Collection filteredSfis = null;
      for (StoreFileInfo sfi : sfis) {
        // Filter. There is a lag cleaning up compacted reference files. They get cleared
        // after a delay in case outstanding Scanners still have references. Because of this,
        // the listing of the Store content may have straggler reference files. Skip these.
        // It should be safe to skip references at this point because we checked above with
        // the region if it thinks it is splittable and if we are here, it thinks it is
        // splitable.
        if (sfi.isReference()) {
          LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
          continue;
        }
        if (filteredSfis == null) {
          filteredSfis = new ArrayList(sfis.size());
          files.put(family, filteredSfis);
        }
        filteredSfis.add(sfi);
        nbFiles++;
      }
    }
    if (nbFiles == 0) {
      // no file needs to be splitted.
      return new Pair(0, 0);
    }
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(
      conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
        conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
      nbFiles);
    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
        getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
      Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
    final List>> futures = new ArrayList>>(nbFiles);

    // Split each store file.
    for (Map.Entry>e: files.entrySet()) {
      byte [] familyName = Bytes.toBytes(e.getKey());
      final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
      final Collection storeFiles = e.getValue();
      if (storeFiles != null && storeFiles.size() > 0) {
        final CacheConfig cacheConf = new CacheConfig(conf, hcd);
        for (StoreFileInfo storeFileInfo: storeFiles) {
          StoreFileSplitter sfs =
              new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(),
                  storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true));
          futures.add(threadPool.submit(sfs));
        }
      }
    }
    // Shutdown the pool
    threadPool.shutdown();

    // Wait for all the tasks to finish.
    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
    // hbase.regionserver.fileSplitTimeout. If set, use its value.
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
      conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
    try {
      boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
      if (stillRunning) {
        threadPool.shutdownNow();
        // wait for the thread to shutdown completely.
        while (!threadPool.isTerminated()) {
          Thread.sleep(50);
        }
        throw new IOException(
            "Took too long to split the" + " files and create the references, aborting split");
      }
    } catch (InterruptedException e) {
      throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }

    int daughterA = 0;
    int daughterB = 0;
    // Look for any exception
    for (Future> future : futures) {
      try {
        Pair p = future.get();
        daughterA += p.getFirst() != null ? 1 : 0;
        daughterB += p.getSecond() != null ? 1 : 0;
      } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
      } catch (ExecutionException e) {
        throw new IOException(e);
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("pid=" + getProcId() + " split storefiles for region " +
          getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA +
          " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair(daughterA, daughterB);
  }

  private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
      final Path dir) throws IOException {
    if (expectedReferenceFileCount != 0 &&
        expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
      throw new IOException("Failing split. Expected reference file count isn't equal.");
    }
  }

  private Pair splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf)
    throws IOException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("pid=" + getProcId() + " splitting started for store file: " +
          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
    }

    final byte[] splitRow = getSplitRow();
    final String familyName = Bytes.toString(family);
    final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow,
        false, splitPolicy);
    final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow,
       true, splitPolicy);
    if (LOG.isDebugEnabled()) {
      LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
          sf.getPath() + " for region: " + getParentRegion().getShortNameToLog());
    }
    return new Pair(path_first, path_second);
  }

  /**
   * Utility class used to do the file splitting / reference writing
   * in parallel instead of sequentially.
   */
  private class StoreFileSplitter implements Callable> {
    private final HRegionFileSystem regionFs;
    private final byte[] family;
    private final HStoreFile sf;

    /**
     * Constructor that takes what it needs to split
     * @param regionFs the file system
     * @param family Family that contains the store file
     * @param sf which file
     */
    public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) {
      this.regionFs = regionFs;
      this.sf = sf;
      this.family = family;
    }

    @Override
    public Pair call() throws IOException {
      return splitStoreFile(regionFs, family, sf);
    }
  }

  /**
   * Post split region actions before the Point-of-No-Return step
   * @param env MasterProcedureEnv
   **/
  private void preSplitRegionBeforeMETA(final MasterProcedureEnv env)
      throws IOException, InterruptedException {
    final List metaEntries = new ArrayList();
    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser());
      try {
        for (Mutation p : metaEntries) {
          RegionInfo.parseRegionName(p.getRow());
        }
      } catch (IOException e) {
        LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as "
            + "region name."
            + "Mutations from coprocessor should only for hbase:meta table.");
        throw e;
      }
    }
  }

  /**
   * Add daughter regions to META
   * @param env MasterProcedureEnv
   */
  private void updateMeta(final MasterProcedureEnv env) throws IOException {
    env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
      daughter_1_RI, daughter_2_RI);
  }

  /**
   * Pre split region actions after the Point-of-No-Return step
   * @param env MasterProcedureEnv
   **/
  private void preSplitRegionAfterMETA(final MasterProcedureEnv env)
      throws IOException, InterruptedException {
    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.preSplitAfterMETAAction(getUser());
    }
  }

  /**
   * Post split region actions
   * @param env MasterProcedureEnv
   **/
  private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser());
    }
  }

  private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
    return env.getMasterServices().getAssignmentManager()
      .getRegionStates().getRegionServerOfRegion(getParentRegion());
  }

  private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env,
      final int regionReplication) {
    final UnassignProcedure[] procs = new UnassignProcedure[regionReplication];
    for (int i = 0; i < procs.length; ++i) {
      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i);
      procs[i] = env.getAssignmentManager().
          createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri));
    }
    return procs;
  }

  private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env,
      final int regionReplication) {
    final ServerName targetServer = getParentRegionServerName(env);
    List daughterRegions = new ArrayList(2);
    daughterRegions.add(daughter_1_RI);
    daughterRegions.add(daughter_2_RI);
    return createAssignProcedures(regionReplication, env, daughterRegions, targetServer);
  }

  private AssignProcedure[] createAssignProcedures(final int regionReplication,
      final MasterProcedureEnv env, final List hris, final ServerName serverName) {
    final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication];
    int procsIdx = 0;
    for (int i = 0; i < hris.size(); ++i) {
      // create procs for the primary region with the target server.
      final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0);
      procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName);
    }
    if (regionReplication > 1) {
      List regionReplicas =
          new ArrayList(hris.size() * (regionReplication - 1));
      for (int i = 0; i < hris.size(); ++i) {
        // We don't include primary replica here
        for (int j = 1; j < regionReplication; ++j) {
          regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j));
        }
      }
      // for the replica regions exclude the primary region's server and call LB's roundRobin
      // assignment
      AssignProcedure[] replicaAssignProcs = env.getAssignmentManager()
          .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName));
      for (AssignProcedure proc : replicaAssignProcs) {
        procs[procsIdx++] = proc;
      }
    }
    return procs;
  }

  private int getRegionReplication(final MasterProcedureEnv env) throws IOException {
    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    return htd.getRegionReplication();
  }

  private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
    MasterFileSystem fs = env.getMasterFileSystem();
    long maxSequenceId = WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(),
      getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
    if (maxSequenceId > 0) {
      WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
        getWALRegionDir(env, daughter_1_RI), maxSequenceId);
      WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
        getWALRegionDir(env, daughter_2_RI), maxSequenceId);
    }
  }

  /**
   * The procedure could be restarted from a different machine. If the variable is null, we need to
   * retrieve it.
   * @return traceEnabled
   */
  private boolean isTraceEnabled() {
    if (traceEnabled == null) {
      traceEnabled = LOG.isTraceEnabled();
    }
    return traceEnabled;
  }

  @Override
  protected boolean abort(MasterProcedureEnv env) {
    // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all
    // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this
    // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022.
    return isRollbackSupported(getCurrentState())? super.abort(env): false;
  }
}