org.apache.phoenix.hbase.index.IndexRegionObserver Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of phoenix-server-hbase-2.6
Phoenix HBase Server Side JAR
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.hbase.index;


import static org.apache.hadoop.hbase.HConstants.OperationStatusCode.SUCCESS;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.UPSERT_CF;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.UPSERT_STATUS_CQ;
import static org.apache.phoenix.hbase.index.util.IndexManagementUtil.rethrowIndexingException;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.phoenix.execute.MutationState;
import org.apache.phoenix.expression.CaseExpression;
import org.apache.phoenix.index.PhoenixIndexBuilderHelper;
import org.apache.phoenix.schema.types.PInteger;
import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;
import org.apache.phoenix.thirdparty.com.google.common.collect.ArrayListMultimap;
import org.apache.phoenix.thirdparty.com.google.common.collect.ListMultimap;
import org.apache.phoenix.thirdparty.com.google.common.collect.Lists;
import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.regionserver.MiniBatchOperationInProgress;
import org.apache.hadoop.hbase.regionserver.OperationStatus;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.hadoop.hbase.wal.WALKey;
import org.apache.hadoop.io.WritableUtils;
import org.apache.htrace.Span;
import org.apache.htrace.Trace;
import org.apache.htrace.TraceScope;
import org.apache.phoenix.compile.ScanRanges;
import org.apache.phoenix.coprocessor.DelegateRegionCoprocessorEnvironment;
import org.apache.phoenix.coprocessor.generated.PTableProtos;
import org.apache.phoenix.exception.DataExceedsCapacityException;
import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.expression.ExpressionType;
import org.apache.phoenix.expression.KeyValueColumnExpression;
import org.apache.phoenix.expression.visitor.ExpressionVisitor;
import org.apache.phoenix.expression.visitor.StatelessTraverseAllExpressionVisitor;
import org.apache.phoenix.filter.SkipScanFilter;
import org.apache.phoenix.hbase.index.LockManager.RowLock;
import org.apache.phoenix.hbase.index.builder.FatalIndexBuildingFailureException;
import org.apache.phoenix.hbase.index.builder.IndexBuildManager;
import org.apache.phoenix.hbase.index.builder.IndexBuilder;
import org.apache.phoenix.hbase.index.covered.IndexMetaData;
import org.apache.phoenix.hbase.index.covered.update.ColumnReference;
import org.apache.phoenix.hbase.index.metrics.MetricsIndexerSource;
import org.apache.phoenix.hbase.index.metrics.MetricsIndexerSourceFactory;
import org.apache.phoenix.hbase.index.table.HTableInterfaceReference;
import org.apache.phoenix.hbase.index.util.GenericKeyValueBuilder;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.hbase.index.write.IndexWriter;
import org.apache.phoenix.hbase.index.write.LazyParallelWriterIndexCommitter;
import org.apache.phoenix.index.IndexMaintainer;
import org.apache.phoenix.index.PhoenixIndexMetaData;
import org.apache.phoenix.query.KeyRange;
import org.apache.phoenix.query.QueryConstants;
import org.apache.phoenix.query.QueryServicesOptions;
import org.apache.phoenix.schema.PColumn;
import org.apache.phoenix.schema.PRow;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.PTableImpl;
import org.apache.phoenix.schema.PTableType;
import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
import org.apache.phoenix.schema.transform.TransformMaintainer;
import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.trace.TracingUtils;
import org.apache.phoenix.trace.util.NullSpan;
import org.apache.phoenix.util.ByteUtil;
import org.apache.phoenix.util.ClientUtil;
import org.apache.phoenix.util.EncodedColumnsUtil;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.IndexUtil;
import org.apache.phoenix.util.PhoenixKeyValueUtil;
import org.apache.phoenix.util.SchemaUtil;
import org.apache.phoenix.util.ServerIndexUtil;
import org.apache.phoenix.util.ServerUtil.ConnectionType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.apache.phoenix.coprocessor.IndexRebuildRegionScanner.applyNew;
import static org.apache.phoenix.coprocessor.IndexRebuildRegionScanner.removeColumn;
import static org.apache.phoenix.index.PhoenixIndexBuilderHelper.ATOMIC_OP_ATTRIB;
import static org.apache.phoenix.util.ByteUtil.EMPTY_BYTE_ARRAY;

/**
 * Do all the work of managing index updates from a single coprocessor. All Puts/Delets are passed
 * to an {@link IndexBuilder} to determine the actual updates to make.
 * We don't need to implement {@link #postPut(ObserverContext, Put, WALEdit, Durability)} and
 * {@link #postDelete(ObserverContext, Delete, WALEdit, Durability)} hooks because
 * Phoenix always does batch mutations.
 * 
 */
public class IndexRegionObserver implements RegionCoprocessor, RegionObserver {

    private static final Logger LOG = LoggerFactory.getLogger(IndexRegionObserver.class);
    private static final OperationStatus IGNORE = new OperationStatus(SUCCESS);
    private static final OperationStatus NOWRITE = new OperationStatus(SUCCESS);
    public static final String PHOENIX_APPEND_METADATA_TO_WAL = "phoenix.append.metadata.to.wal";
    public static final boolean DEFAULT_PHOENIX_APPEND_METADATA_TO_WAL = false;
    /**
     * Class to represent pending data table rows
     * */
    private class PendingRow {
        private int count;
        private boolean usable;
        private ImmutableBytesPtr rowKey;
        private BatchMutateContext lastContext;

        PendingRow(ImmutableBytesPtr rowKey, BatchMutateContext context) {
            count = 1;
            usable = true;
            lastContext = context;
            this.rowKey = rowKey;
        }

        public boolean add(BatchMutateContext context) {
            synchronized (this) {
                if (usable) {
                    count++;
                    lastContext = context;
                    return true;
                }
            }
            return false;
        }

        public void remove() {
            synchronized (this) {
                count--;
                if (count == 0) {
                    pendingRows.remove(rowKey);
                    usable = false;
                }
            }
        }

        public int getCount() {
          return count;
      }

        public BatchMutateContext getLastContext() {
          return lastContext;
      }
    }

  private static boolean ignoreIndexRebuildForTesting  = false;
  private static boolean failPreIndexUpdatesForTesting = false;
  private static boolean failPostIndexUpdatesForTesting = false;
  private static boolean failDataTableUpdatesForTesting = false;

  public static void setIgnoreIndexRebuildForTesting(boolean ignore) { ignoreIndexRebuildForTesting = ignore; }

  public static void setFailPreIndexUpdatesForTesting(boolean fail) { failPreIndexUpdatesForTesting = fail; }

  public static void setFailPostIndexUpdatesForTesting(boolean fail) { failPostIndexUpdatesForTesting = fail; }

  public static void setFailDataTableUpdatesForTesting(boolean fail) {
      failDataTableUpdatesForTesting = fail;
  }

  public enum BatchMutatePhase {
      PRE, POST, FAILED
  }

  // Hack to get around not being able to save any state between
  // coprocessor calls. TODO: remove after HBASE-18127 when available

  /**
   * The concurrent batch of mutations is a set such that every pair of batches in this set has at least one common row.
   * Since a BatchMutateContext object of a batch is modified only after the row locks for all the rows that are mutated
   * by this batch are acquired, there can be only one thread can acquire the locks for its batch and safely access
   * all the batch contexts in the set of concurrent batches. Because of this, we do not read atomic variables or
   * additional locks to serialize the access to the BatchMutateContext objects.
   */

  public static class BatchMutateContext {
      private volatile BatchMutatePhase currentPhase = BatchMutatePhase.PRE;
      // The max of reference counts on the pending rows of this batch at the time this batch arrives
      private int maxPendingRowCount = 0;
      private final int clientVersion;
      // The collection of index mutations that will be applied before the data table mutations. The empty column (i.e.,
      // the verified column) will have the value false ("unverified") on these mutations
      private ListMultimap preIndexUpdates;
      // The collection of index mutations that will be applied after the data table mutations. The empty column (i.e.,
      // the verified column) will have the value true ("verified") on the put mutations
      private ListMultimap postIndexUpdates;
      // The collection of candidate index mutations that will be applied after the data table mutations
      private ListMultimap> indexUpdates;
      private List rowLocks = Lists.newArrayListWithExpectedSize(QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
      //  TreeSet to improve locking efficiency and avoid deadlock (PHOENIX-6871 and HBASE-17924) 
      private Set rowsToLock = new TreeSet<>();
      // The current and next states of the data rows corresponding to the pending mutations
      private HashMap> dataRowStates;
      // The previous concurrent batch contexts
      private HashMap lastConcurrentBatchContext = null;
      // The latches of the threads waiting for this batch to complete
      private List waitList = null;
      private Map multiMutationMap;

      //list containing the original mutations from the MiniBatchOperationInProgress. Contains
      // any annotations we were sent by the client, and can be used in hooks that don't get
      // passed MiniBatchOperationInProgress, like preWALAppend()
      private List originalMutations;
      private boolean hasAtomic;
      private boolean hasDelete;
      private boolean hasUncoveredIndex;
      private boolean hasGlobalIndex;
      private boolean hasLocalIndex;
      private boolean hasTransform;
      public BatchMutateContext() {
          this.clientVersion = 0;
      }
      public BatchMutateContext(int clientVersion) {
          this.clientVersion = clientVersion;
      }

      public void populateOriginalMutations(MiniBatchOperationInProgress miniBatchOp) {
          originalMutations = new ArrayList(miniBatchOp.size());
          for (int k = 0; k < miniBatchOp.size(); k++) {
              originalMutations.add(miniBatchOp.getOperation(k));
          }
      }
      public List getOriginalMutations() {
          return originalMutations;
      }

      public BatchMutatePhase getCurrentPhase() {
          return currentPhase;
      }

      public Put getNextDataRowState(ImmutableBytesPtr rowKeyPtr) {
          Pair rowState = dataRowStates.get(rowKeyPtr);
          if (rowState != null) {
              return rowState.getSecond();
          }
          return null;
      }

      public CountDownLatch getCountDownLatch() {
          synchronized (this) {
              if (currentPhase != BatchMutatePhase.PRE) {
                  return null;
              }
              if (waitList == null) {
                  waitList = new ArrayList<>();
              }
              CountDownLatch countDownLatch = new CountDownLatch(1);
              waitList.add(countDownLatch);
              return countDownLatch;
          }
      }

      public void countDownAllLatches() {
          synchronized (this) {
              if (waitList != null) {
                  for (CountDownLatch countDownLatch : waitList) {
                      countDownLatch.countDown();
                  }
              }
          }
      }

      public int getMaxPendingRowCount() {
          return maxPendingRowCount;
      }
  }

  private ThreadLocal batchMutateContext =
          new ThreadLocal();

  /**
   * Configuration key for if the indexer should check the version of HBase is running. Generally,
   * you only want to ignore this for testing or for custom versions of HBase.
   */
  public static final String CHECK_VERSION_CONF_KEY = "com.saleforce.hbase.index.checkversion";

  public static final String INDEX_LAZY_POST_BATCH_WRITE = "org.apache.hadoop.hbase.index.lazy.post_batch.write";
  private static final boolean INDEX_LAZY_POST_BATCH_WRITE_DEFAULT = false;

  private static final String INDEXER_INDEX_WRITE_SLOW_THRESHOLD_KEY = "phoenix.indexer.slow.post.batch.mutate.threshold";
  private static final long INDEXER_INDEX_WRITE_SLOW_THRESHOLD_DEFAULT = 3_000;
  private static final String INDEXER_PRE_INCREMENT_SLOW_THRESHOLD_KEY = "phoenix.indexer.slow.pre.increment";
  private static final long INDEXER_PRE_INCREMENT_SLOW_THRESHOLD_DEFAULT = 3_000;

  // Index writers get invoked before and after data table updates
  protected IndexWriter preWriter;
  protected IndexWriter postWriter;

  protected IndexBuildManager builder;
  private LockManager lockManager;

  // The collection of pending data table rows
  private Map pendingRows = new ConcurrentHashMap<>();

  private MetricsIndexerSource metricSource;

  private boolean stopped;
  private boolean disabled;
  private long slowIndexPrepareThreshold;
  private long slowPreIncrementThreshold;
  private int rowLockWaitDuration;
  private int concurrentMutationWaitDuration;
  private String dataTableName;
  private boolean shouldWALAppend = DEFAULT_PHOENIX_APPEND_METADATA_TO_WAL;
  private boolean isNamespaceEnabled = false;
  private boolean useBloomFilter = false;
  private long lastTimestamp = 0;
  private List> batchesWithLastTimestamp = new ArrayList<>();
  private static final int DEFAULT_ROWLOCK_WAIT_DURATION = 30000;
  private static final int DEFAULT_CONCURRENT_MUTATION_WAIT_DURATION_IN_MS = 100;

  @Override
  public Optional getRegionObserver() {
    return Optional.of(this);
  }

  @Override
  public void start(CoprocessorEnvironment e) throws IOException {
      try {
        final RegionCoprocessorEnvironment env = (RegionCoprocessorEnvironment) e;
        String serverName = env.getServerName().getServerName();
        if (env.getConfiguration().getBoolean(CHECK_VERSION_CONF_KEY, true)) {
          // make sure the right version <-> combinations are allowed.
          String errormsg = Indexer.validateVersion(env.getHBaseVersion(), env.getConfiguration());
          if (errormsg != null) {
              throw new FatalIndexBuildingFailureException(errormsg);
          }
        }

        this.builder = new IndexBuildManager(env);
        // Clone the config since it is shared
        DelegateRegionCoprocessorEnvironment indexWriterEnv = new DelegateRegionCoprocessorEnvironment(env, ConnectionType.INDEX_WRITER_CONNECTION);
        // setup the actual index preWriter
        this.preWriter = new IndexWriter(indexWriterEnv, serverName + "-index-preWriter", false);
        if (env.getConfiguration().getBoolean(INDEX_LAZY_POST_BATCH_WRITE, INDEX_LAZY_POST_BATCH_WRITE_DEFAULT)) {
            this.postWriter = new IndexWriter(indexWriterEnv, new LazyParallelWriterIndexCommitter(), serverName + "-index-postWriter", false);
        }
        else {
            this.postWriter = this.preWriter;
        }

        this.rowLockWaitDuration = env.getConfiguration().getInt("hbase.rowlock.wait.duration",
                DEFAULT_ROWLOCK_WAIT_DURATION);
          this.lockManager = new LockManager();
          this.concurrentMutationWaitDuration = env.getConfiguration().getInt("phoenix.index.concurrent.wait.duration.ms",
                  DEFAULT_CONCURRENT_MUTATION_WAIT_DURATION_IN_MS);
          // Metrics impl for the Indexer -- avoiding unnecessary indirection for hadoop-1/2 compat
          this.metricSource = MetricsIndexerSourceFactory.getInstance().getIndexerSource();
          setSlowThresholds(e.getConfiguration());
          this.dataTableName = env.getRegionInfo().getTable().getNameAsString();
          this.shouldWALAppend = env.getConfiguration().getBoolean(PHOENIX_APPEND_METADATA_TO_WAL,
              DEFAULT_PHOENIX_APPEND_METADATA_TO_WAL);
          this.isNamespaceEnabled = SchemaUtil.isNamespaceMappingEnabled(PTableType.INDEX,
              env.getConfiguration());
          TableDescriptor tableDescriptor = env.getRegion().getTableDescriptor();
          BloomType bloomFilterType = tableDescriptor.getColumnFamilies()[0].getBloomFilterType();
          // when the table descriptor changes, the coproc is reloaded
          this.useBloomFilter = bloomFilterType == BloomType.ROW;
      } catch (NoSuchMethodError ex) {
          disabled = true;
          LOG.error("Must be too early a version of HBase. Disabled coprocessor ", ex);
      }
  }

  /**
   * Extracts the slow call threshold values from the configuration.
   */
  private void setSlowThresholds(Configuration c) {
      slowIndexPrepareThreshold = c.getLong(INDEXER_INDEX_WRITE_SLOW_THRESHOLD_KEY,
          INDEXER_INDEX_WRITE_SLOW_THRESHOLD_DEFAULT);
      slowPreIncrementThreshold = c.getLong(INDEXER_PRE_INCREMENT_SLOW_THRESHOLD_KEY,
          INDEXER_PRE_INCREMENT_SLOW_THRESHOLD_DEFAULT);
  }

  private String getCallTooSlowMessage(String callName, long duration, long threshold) {
      StringBuilder sb = new StringBuilder(64);
      sb.append("(callTooSlow) ").append(callName).append(" duration=").append(duration);
      sb.append("ms, threshold=").append(threshold).append("ms");
      return sb.toString();
  }

  @Override
  public void stop(CoprocessorEnvironment e) throws IOException {
    if (this.stopped) {
      return;
    }
    if (this.disabled) {
        return;
      }
    this.stopped = true;
    String msg = "Indexer is being stopped";
    this.builder.stop(msg);
    this.preWriter.stop(msg);
    this.postWriter.stop(msg);
  }

  /**
   * We use an Increment to serialize the ON DUPLICATE KEY clause so that the HBase plumbing
   * sets up the necessary locks and mvcc to allow an atomic update. The Increment is not a
   * real increment, though, it's really more of a Put. We translate the Increment into a
   * list of mutations, at most a single Put and Delete that are the changes upon executing
   * the list of ON DUPLICATE KEY clauses for this row.
   */
  @Override
  public Result preIncrementAfterRowLock(final ObserverContext e,
          final Increment inc) throws IOException {
      long start = EnvironmentEdgeManager.currentTimeMillis();
      try {
          List mutations = this.builder.executeAtomicOp(inc);
          if (mutations == null) {
              return null;
          }

          // Causes the Increment to be ignored as we're committing the mutations
          // ourselves below.
          e.bypass();
          // ON DUPLICATE KEY IGNORE will return empty list if row already exists
          // as no action is required in that case.
          if (!mutations.isEmpty()) {
              Region region = e.getEnvironment().getRegion();
              // Otherwise, submit the mutations directly here
                region.batchMutate(mutations.toArray(new Mutation[0]));
          }
          return Result.EMPTY_RESULT;
      } catch (Throwable t) {
          throw ClientUtil.createIOException(
                  "Unable to process ON DUPLICATE IGNORE for " +
                  e.getEnvironment().getRegion().getRegionInfo().getTable().getNameAsString() +
                  "(" + Bytes.toStringBinary(inc.getRow()) + ")", t);
      } finally {
          long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
          if (duration >= slowIndexPrepareThreshold) {
              if (LOG.isDebugEnabled()) {
                  LOG.debug(getCallTooSlowMessage("preIncrementAfterRowLock", duration, slowPreIncrementThreshold));
              }
              metricSource.incrementSlowDuplicateKeyCheckCalls(dataTableName);
          }
          metricSource.updateDuplicateKeyCheckTime(dataTableName, duration);
      }
  }

  @Override
  public void preBatchMutate(ObserverContext c,
      MiniBatchOperationInProgress miniBatchOp) throws IOException {
      if (this.disabled) {
          return;
      }
      try {
          preBatchMutateWithExceptions(c, miniBatchOp);
          return;
      } catch (Throwable t) {
          rethrowIndexingException(t);
      }
      throw new RuntimeException(
        "Somehow didn't return an index update but also didn't propagate the failure to the client!");
  }

  private void ignoreAtomicOperations (MiniBatchOperationInProgress miniBatchOp) {
      for (int i = 0; i < miniBatchOp.size(); i++) {
          Mutation m = miniBatchOp.getOperation(i);
          if (this.builder.isAtomicOp(m)) {
              miniBatchOp.setOperationStatus(i, IGNORE);
          }
      }
  }

  private void populateRowsToLock(MiniBatchOperationInProgress miniBatchOp,
          BatchMutateContext context) {
      for (int i = 0; i < miniBatchOp.size(); i++) {
          Mutation m = miniBatchOp.getOperation(i);
          if (this.builder.isAtomicOp(m) || this.builder.isEnabled(m)) {
              ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
              context.rowsToLock.add(row);

          }
      }
  }

    /**
     * Add the mutations generated by the ON DUPLICATE KEY UPDATE to the current batch.
     * MiniBatchOperationInProgress#addOperationsFromCP() allows coprocessors to attach additional mutations
     * to the incoming mutation. These additional mutations are only executed if the status of the original
     * mutation is set to NOT_RUN. For atomic mutations, we want HBase to ignore the incoming mutation and
     * instead execute the mutations generated by the server for that atomic mutation. But we can’t achieve
     * this behavior just by setting the status of the original mutation to IGNORE because that will also
     * ignore the additional mutations added by the coprocessors. To get around this, we need to do a fixup
     * of the original mutation in the batch. Since we always generate one Put mutation from the incoming atomic
     * Put mutation, we can transfer the cells from the generated Put mutation to the original atomic Put mutation in the batch.
     * The additional mutations (Delete) can then be added to the operationsFromCoprocessors array.
     */
  private void addOnDupMutationsToBatch(MiniBatchOperationInProgress miniBatchOp,
                                          int index, List mutations) {
      List deleteMutations = Lists.newArrayListWithExpectedSize(mutations.size());
      for (Mutation m : mutations) {
          if (m instanceof Put) {
              // fix the incoming atomic mutation
              Mutation original = miniBatchOp.getOperation(index);
              original.getFamilyCellMap().putAll(m.getFamilyCellMap());
          } else if (m instanceof Delete) {
              deleteMutations.add((Delete)m);
          }
      }

      if (!deleteMutations.isEmpty()) {
          miniBatchOp.addOperationsFromCP(index,
              deleteMutations.toArray(new Mutation[deleteMutations.size()]));
      }
  }

    private void addOnDupMutationsToBatch(MiniBatchOperationInProgress miniBatchOp,
                                        BatchMutateContext context) throws IOException {
      for (int i = 0; i < miniBatchOp.size(); i++) {
          Mutation m = miniBatchOp.getOperation(i);
          if (this.builder.isAtomicOp(m) && m instanceof Put) {
              List mutations = generateOnDupMutations(context, (Put)m);
              if (!mutations.isEmpty()) {
                  addOnDupMutationsToBatch(miniBatchOp, i, mutations);
              } else {
                  // empty list of generated mutations implies
                  // 1) ON DUPLICATE KEY IGNORE if row already exists, OR
                  // 2) ON DUPLICATE KEY UPDATE if CASE expression is specified and in each of
                  // them the new value is the same as the old value in the ELSE-clause (empty
                  // cell timestamp will NOT be updated)
                  byte[] retVal = PInteger.INSTANCE.toBytes(0);
                  Cell cell = PhoenixKeyValueUtil.newKeyValue(m.getRow(), Bytes.toBytes(UPSERT_CF),
                          Bytes.toBytes(UPSERT_STATUS_CQ), 0, retVal, 0, retVal.length);
                  // put Result in OperationStatus for returning update status from conditional
                  // upserts, where 0 represents the row is not updated
                  Result result = Result.create(new ArrayList<>(Arrays.asList(cell)));
                  miniBatchOp.setOperationStatus(i,
                          new OperationStatus(SUCCESS, result));
              }
          }
      }
  }

  private void lockRows(BatchMutateContext context) throws IOException {
      for (ImmutableBytesPtr rowKey : context.rowsToLock) {
          context.rowLocks.add(lockManager.lockRow(rowKey, rowLockWaitDuration));
      }
  }

    private void unlockRows(BatchMutateContext context) throws IOException {
        for (RowLock rowLock : context.rowLocks) {
            rowLock.release();
        }
        context.rowLocks.clear();
    }

    private Collection groupMutations(MiniBatchOperationInProgress miniBatchOp,
                                                          BatchMutateContext context) throws IOException {
        context.multiMutationMap = new HashMap<>();
        for (int i = 0; i < miniBatchOp.size(); i++) {
            Mutation m = miniBatchOp.getOperation(i);
            // skip this mutation if we aren't enabling indexing
            // unfortunately, we really should ask if the raw mutation (rather than the combined mutation)
            // should be indexed, which means we need to expose another method on the builder. Such is the
            // way optimization go though.
            if (!isAtomicOperationComplete(miniBatchOp.getOperationStatus(i)) && this.builder.isEnabled(m)) {
                ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
                MultiMutation stored = context.multiMutationMap.get(row);
                if (stored == null) {
                    // we haven't seen this row before, so add it
                    stored = new MultiMutation(row);
                    context.multiMutationMap.put(row, stored);
                }
                stored.addAll(m);
                Mutation[] mutationsAddedByCP = miniBatchOp.getOperationsFromCoprocessors(i);
                if (mutationsAddedByCP != null) {
                    for (Mutation addedMutation : mutationsAddedByCP) {
                        stored.addAll(addedMutation);
                    }
                }
            }
        }
        return context.multiMutationMap.values();
    }

    public static void setTimestamps(MiniBatchOperationInProgress miniBatchOp,
                                     IndexBuildManager builder, long ts) throws IOException {
        for (Integer i = 0; i < miniBatchOp.size(); i++) {
            if (isAtomicOperationComplete(miniBatchOp.getOperationStatus(i))) {
                continue;
            }
            Mutation m = miniBatchOp.getOperation(i);
            // skip this mutation if we aren't enabling indexing or not an atomic op
            // or if it is an atomic op and its timestamp is already set(not LATEST)
            if (!builder.isEnabled(m) &&
                !(builder.isAtomicOp(m) && IndexUtil.getMaxTimestamp(m) == HConstants.LATEST_TIMESTAMP)) {
                continue;
            }
            setTimestampOnMutation(m, ts);

            // set the timestamps on any additional mutations added
            Mutation[] mutationsAddedByCP = miniBatchOp.getOperationsFromCoprocessors(i);
            if (mutationsAddedByCP != null) {
                for (Mutation addedMutation : mutationsAddedByCP) {
                    setTimestampOnMutation(addedMutation, ts);
                }
            }
        }
    }

    private static void setTimestampOnMutation(Mutation m, long ts) throws IOException {
        for (List cells : m.getFamilyCellMap().values()) {
            for (Cell cell : cells) {
                CellUtil.setTimestamp(cell, ts);
            }
        }
    }

    /**
     * This method applies pending delete mutations on the next row states
     */
    private void applyPendingDeleteMutations(MiniBatchOperationInProgress miniBatchOp,
                                             BatchMutateContext context) throws IOException {
        for (int i = 0; i < miniBatchOp.size(); i++) {
            if (miniBatchOp.getOperationStatus(i) == IGNORE) {
                continue;
            }
            Mutation m = miniBatchOp.getOperation(i);
            if (!this.builder.isEnabled(m)) {
                continue;
            }
            if (!(m instanceof Delete)) {
                continue;
            }

            if (!applyOnePendingDeleteMutation(context, (Delete) m)) {
                miniBatchOp.setOperationStatus(i, NOWRITE);
            }
        }
    }

    /**
     * This method returns true if the pending delete mutation needs to be applied
     * and false f the delete mutation can be ignored for example in the case of
     * delete on non-existing row.
     */
    private boolean applyOnePendingDeleteMutation(BatchMutateContext context, Delete delete) {
        ImmutableBytesPtr rowKeyPtr = new ImmutableBytesPtr(delete.getRow());
        Pair dataRowState = context.dataRowStates.get(rowKeyPtr);
        if (dataRowState == null) {
            dataRowState = new Pair(null, null);
            context.dataRowStates.put(rowKeyPtr, dataRowState);
        }
        Put nextDataRowState = dataRowState.getSecond();
        if (nextDataRowState == null) {
            if (dataRowState.getFirst() == null) {
                // This is a delete row mutation on a non-existing row. There is no need to apply this mutation
                // on the data table
                return false;
            }
        }

        for (List cells : delete.getFamilyCellMap().values()) {
            for (Cell cell : cells) {
                switch (cell.getType()) {
                    case DeleteFamily:
                    case DeleteFamilyVersion:
                        nextDataRowState.getFamilyCellMap().remove(CellUtil.cloneFamily(cell));
                        break;
                    case DeleteColumn:
                    case Delete:
                        removeColumn(nextDataRowState, cell);
                }
            }
        }
        if (nextDataRowState != null && nextDataRowState.getFamilyCellMap().size() == 0) {
            dataRowState.setSecond(null);
        }
        return true;
    }

    /**
     * This method applies the pending put mutations on the the next row states.
     * Before this method is called, the next row states is set to current row states.
     */
    private void applyPendingPutMutations(MiniBatchOperationInProgress miniBatchOp,
                                          BatchMutateContext context, long now) throws IOException {
        for (Integer i = 0; i < miniBatchOp.size(); i++) {
            if (isAtomicOperationComplete(miniBatchOp.getOperationStatus(i))) {
                continue;
            }
            Mutation m = miniBatchOp.getOperation(i);
            // skip this mutation if we aren't enabling indexing
            if (!this.builder.isEnabled(m)) {
                continue;
            }

            if (!(m instanceof Put)) {
                continue;
            }

            ImmutableBytesPtr rowKeyPtr = new ImmutableBytesPtr(m.getRow());
            Pair dataRowState = context.dataRowStates.get(rowKeyPtr);
            if (dataRowState == null) {
                dataRowState = new Pair(null, null);
                context.dataRowStates.put(rowKeyPtr, dataRowState);
            }
            Put nextDataRowState = dataRowState.getSecond();
            dataRowState.setSecond((nextDataRowState != null) ? applyNew((Put) m, nextDataRowState) : new Put((Put) m));

            Mutation[] mutationsAddedByCP = miniBatchOp.getOperationsFromCoprocessors(i);
            if (mutationsAddedByCP != null) {
                // all added mutations are of type delete corresponding to set nulls
                for (Mutation addedMutation : mutationsAddedByCP) {
                    applyOnePendingDeleteMutation(context, (Delete)addedMutation);
                }
            }
        }
    }

    /**
     * * Prepares next data row state
     */
    private void prepareDataRowStates(ObserverContext c,
                                      MiniBatchOperationInProgress miniBatchOp,
                                      BatchMutateContext context,
                                      long now) throws IOException {
        if (context.rowsToLock.size() == 0) {
            return;
        }
        applyPendingPutMutations(miniBatchOp, context, now);
        applyPendingDeleteMutations(miniBatchOp, context);
    }

    /**
     * The index update generation for local indexes uses the existing index update generation code (i.e.,
     * the {@link IndexBuilder} implementation).
     */
    private void handleLocalIndexUpdates(TableName table,
                                         MiniBatchOperationInProgress miniBatchOp,
                                         Collection pendingMutations,
                                         PhoenixIndexMetaData indexMetaData) throws Throwable {
        ListMultimap> indexUpdates = ArrayListMultimap.>create();
        this.builder.getIndexUpdates(indexUpdates, miniBatchOp, pendingMutations, indexMetaData);
        byte[] tableName = table.getName();
        HTableInterfaceReference hTableInterfaceReference =
                new HTableInterfaceReference(new ImmutableBytesPtr(tableName));
        List> localIndexUpdates = indexUpdates.removeAll(hTableInterfaceReference);
        if (localIndexUpdates == null || localIndexUpdates.isEmpty()) {
            return;
        }
        List localUpdates = new ArrayList();
        Iterator> indexUpdatesItr = localIndexUpdates.iterator();
        while (indexUpdatesItr.hasNext()) {
            Pair next = indexUpdatesItr.next();
            localUpdates.add(next.getFirst());
        }
        if (!localUpdates.isEmpty()) {
            Mutation[] mutationsAddedByCP = miniBatchOp.getOperationsFromCoprocessors(0);
            if (mutationsAddedByCP != null) {
                localUpdates.addAll(Arrays.asList(mutationsAddedByCP));
            }
            miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
        }
    }

    /**
     * Determines if any of the data table mutations in the given batch does not include all
     * the indexed columns or the where clause columns for partial uncovered indexes.
     */
    private boolean isPartialUncoveredIndexMutation(PhoenixIndexMetaData indexMetaData,
            MiniBatchOperationInProgress miniBatchOp) {
        int indexedColumnCount = 0;
        for (IndexMaintainer indexMaintainer : indexMetaData.getIndexMaintainers()) {
            indexedColumnCount += indexMaintainer.getIndexedColumns().size();
            if (indexMaintainer.getIndexWhereColumns() != null) {
                indexedColumnCount += indexMaintainer.getIndexWhereColumns().size();
            }
        }
        Set columns = new HashSet(indexedColumnCount);
        for (IndexMaintainer indexMaintainer : indexMetaData.getIndexMaintainers()) {
            columns.addAll(indexMaintainer.getIndexedColumns());
            if (indexMaintainer.getIndexWhereColumns() != null) {
                columns.addAll(indexMaintainer.getIndexWhereColumns());
            }
        }
        for (int i = 0; i < miniBatchOp.size(); i++) {
            if (isAtomicOperationComplete(miniBatchOp.getOperationStatus(i))) {
                continue;
            }
            Mutation m = miniBatchOp.getOperation(i);
            if (!this.builder.isEnabled(m)) {
                continue;
            }
            for (ColumnReference column : columns) {
                if (m.get(column.getFamily(), column.getQualifier()).isEmpty()) {
                    // The returned list is empty, which means the indexed column is not
                    // included. This mutation would result in partial index update (and thus
                    // index column values should be retrieved from the existing data table row)
                    return true;
                }
            }
        }
        return false;
    }
    /**
     * Retrieve the data row state either from memory or disk. The rows are locked by the caller.
     */
    private void getCurrentRowStates(ObserverContext c,
                                     BatchMutateContext context) throws IOException {
        Set keys = new HashSet(context.rowsToLock.size());
        for (ImmutableBytesPtr rowKeyPtr : context.rowsToLock) {
            PendingRow pendingRow = new PendingRow(rowKeyPtr, context);
            // Add the data table rows in the mini batch to the per region collection of pending
            // rows. This will be used to detect concurrent updates
            PendingRow existingPendingRow = pendingRows.putIfAbsent(rowKeyPtr, pendingRow);
            if (existingPendingRow == null) {
                // There was no pending row for this row key. We need to retrieve this row from disk
                keys.add(PVarbinary.INSTANCE.getKeyRange(rowKeyPtr.get(), SortOrder.ASC));
            } else {
                // There is a pending row for this row key. We need to retrieve the row from memory
                BatchMutateContext lastContext = existingPendingRow.getLastContext();
                if (existingPendingRow.add(context)) {
                    BatchMutatePhase phase = lastContext.getCurrentPhase();
                    Preconditions.checkArgument(phase != BatchMutatePhase.POST,
                            "the phase of the last batch cannot be POST");
                    if (phase == BatchMutatePhase.PRE) {
                        if (context.lastConcurrentBatchContext == null) {
                            context.lastConcurrentBatchContext = new HashMap<>();
                        }
                        context.lastConcurrentBatchContext.put(rowKeyPtr, lastContext);
                        if (context.maxPendingRowCount < existingPendingRow.getCount()) {
                            context.maxPendingRowCount = existingPendingRow.getCount();
                        }
                        Put put = lastContext.getNextDataRowState(rowKeyPtr);
                        if (put != null) {
                            context.dataRowStates.put(rowKeyPtr, new Pair<>(put, new Put(put)));
                        }
                    } else {
                        // The last batch for this row key failed. We cannot use the memory state.
                        // So we need to retrieve this row from disk
                        keys.add(PVarbinary.INSTANCE.getKeyRange(rowKeyPtr.get(), SortOrder.ASC));
                    }
                } else {
                    // The existing pending row is removed from the map. That means there is no
                    // pending row for this row key anymore. We need to add the new one to the map
                    pendingRows.put(rowKeyPtr, pendingRow);
                    keys.add(PVarbinary.INSTANCE.getKeyRange(rowKeyPtr.get(), SortOrder.ASC));
                }
            }
        }
        if (keys.isEmpty()) {
            return;
        }

        if (this.useBloomFilter) {
            for (KeyRange key : keys) {
                // Scan.java usage alters scan instances, safer to create scan instance per usage
                Scan scan = new Scan();
                // create a scan with same start/stop row key scan#isGetScan()
                // for bloom filters scan should be a get
                scan.withStartRow(key.getLowerRange(), true);
                scan.withStopRow(key.getLowerRange(), true);
                readDataTableRows(c, context, scan);
            }
        }
        else {
            Scan scan = new Scan();
            ScanRanges scanRanges = ScanRanges.createPointLookup(new ArrayList(keys));
            scanRanges.initializeScan(scan);
            SkipScanFilter skipScanFilter = scanRanges.getSkipScanFilter();
            scan.setFilter(skipScanFilter);
            readDataTableRows(c, context, scan);
        }
    }

    private void readDataTableRows(ObserverContext c,
                                   BatchMutateContext context, Scan scan) throws IOException {
        try (RegionScanner scanner = c.getEnvironment().getRegion().getScanner(scan)) {
            boolean more = true;
            while (more) {
                List cells = new ArrayList();
                more = scanner.next(cells);
                if (cells.isEmpty()) {
                    continue;
                }
                byte[] rowKey = CellUtil.cloneRow(cells.get(0));
                Put put = new Put(rowKey);
                for (Cell cell : cells) {
                    put.add(cell);
                }
                context.dataRowStates.put(new ImmutableBytesPtr(rowKey), new Pair(put, new Put(put)));
            }
        }
    }

    /**
     * Generate the index update for a data row from the mutation that are obtained by merging the previous data row
     * state with the pending row mutation.
     */
    private void prepareIndexMutations(BatchMutateContext context, List maintainers, long ts)
            throws IOException {
        List> indexTables = new ArrayList<>(maintainers.size());
        for (IndexMaintainer indexMaintainer : maintainers) {
            if (indexMaintainer.isLocalIndex()) {
                continue;
            }
            HTableInterfaceReference hTableInterfaceReference =
                    new HTableInterfaceReference(new ImmutableBytesPtr(indexMaintainer.getIndexTableName()));
            indexTables.add(new Pair<>(indexMaintainer, hTableInterfaceReference));
        }
        for (Map.Entry> entry : context.dataRowStates.entrySet()) {
            ImmutableBytesPtr rowKeyPtr = entry.getKey();
            Pair dataRowState =  entry.getValue();
            Put currentDataRowState = dataRowState.getFirst();
            Put nextDataRowState = dataRowState.getSecond();
            if (currentDataRowState == null && nextDataRowState == null) {
                continue;
            }
            for (Pair pair : indexTables) {
                IndexMaintainer indexMaintainer = pair.getFirst();
                HTableInterfaceReference hTableInterfaceReference = pair.getSecond();
                if (nextDataRowState != null
                        && indexMaintainer.shouldPrepareIndexMutations(nextDataRowState)) {
                    ValueGetter nextDataRowVG = new IndexUtil.SimpleValueGetter(nextDataRowState);
                    Put indexPut = indexMaintainer.buildUpdateMutation(GenericKeyValueBuilder.INSTANCE,
                            nextDataRowVG, rowKeyPtr, ts, null, null, false);
                    if (indexPut == null) {
                        // No covered column. Just prepare an index row with the empty column
                        byte[] indexRowKey = indexMaintainer.buildRowKey(nextDataRowVG, rowKeyPtr,
                                null, null, ts);
                        indexPut = new Put(indexRowKey);
                    } else {
                        IndexUtil.removeEmptyColumn(indexPut, indexMaintainer.getEmptyKeyValueFamily().copyBytesIfNecessary(),
                                indexMaintainer.getEmptyKeyValueQualifier());
                    }
                    indexPut.addColumn(
                            indexMaintainer.getEmptyKeyValueFamily().copyBytesIfNecessary(),
                            indexMaintainer.getEmptyKeyValueQualifier(), ts,
                            QueryConstants.UNVERIFIED_BYTES);
                    context.indexUpdates.put(hTableInterfaceReference,
                            new Pair(indexPut, rowKeyPtr.get()));
                    // Delete the current index row if the new index key is different than the current one
                    if (currentDataRowState != null) {
                        ValueGetter currentDataRowVG = new IndexUtil.SimpleValueGetter(currentDataRowState);
                        byte[] indexRowKeyForCurrentDataRow = indexMaintainer.buildRowKey(currentDataRowVG, rowKeyPtr,
                                null, null, ts);
                        if (Bytes.compareTo(indexPut.getRow(), indexRowKeyForCurrentDataRow) != 0) {
                            Mutation del = indexMaintainer.buildRowDeleteMutation(indexRowKeyForCurrentDataRow,
                                    IndexMaintainer.DeleteType.ALL_VERSIONS, ts);
                            context.indexUpdates.put(hTableInterfaceReference,
                                    new Pair(del, rowKeyPtr.get()));
                        }
                    }
                } else if (currentDataRowState != null
                        && indexMaintainer.shouldPrepareIndexMutations(currentDataRowState)) {
                    ValueGetter currentDataRowVG = new IndexUtil.SimpleValueGetter(currentDataRowState);
                    byte[] indexRowKeyForCurrentDataRow = indexMaintainer.buildRowKey(currentDataRowVG, rowKeyPtr,
                            null, null, ts);
                    Mutation del = indexMaintainer.buildRowDeleteMutation(indexRowKeyForCurrentDataRow,
                            IndexMaintainer.DeleteType.ALL_VERSIONS, ts);
                    context.indexUpdates.put(hTableInterfaceReference,
                            new Pair(del, rowKeyPtr.get()));
                }
            }
        }
    }

    /**
     * This method prepares unverified index mutations which are applied to index tables before the data table is
     * updated. In the three-phase update approach, in phase 1, the status of existing index rows is set to "unverified"
     * (these rows will be deleted from the index table in phase 3), and/or new put mutations are added with the
     * unverified status. In phase 2, data table mutations are applied. In phase 3, the status for an index table row is
     * either set to "verified" or the row is deleted.
     */
    private void preparePreIndexMutations(BatchMutateContext context,
                                          long batchTimestamp,
                                          PhoenixIndexMetaData indexMetaData) throws Throwable {
        List maintainers = indexMetaData.getIndexMaintainers();
        // get the current span, or just use a null-span to avoid a bunch of if statements
        try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
            Span current = scope.getSpan();
            if (current == null) {
                current = NullSpan.INSTANCE;
            }
            current.addTimelineAnnotation("Built index updates, doing preStep");
            // The rest of this method is for handling global index updates
            context.indexUpdates = ArrayListMultimap.>create();
            prepareIndexMutations(context, maintainers, batchTimestamp);

            context.preIndexUpdates = ArrayListMultimap.create();
            int updateCount = 0;
            for (IndexMaintainer indexMaintainer : maintainers) {
                updateCount++;
                byte[] emptyCF = indexMaintainer.getEmptyKeyValueFamily().copyBytesIfNecessary();
                byte[] emptyCQ = indexMaintainer.getEmptyKeyValueQualifier();
                HTableInterfaceReference hTableInterfaceReference =
                        new HTableInterfaceReference(new ImmutableBytesPtr(indexMaintainer.getIndexTableName()));
                List > updates = context.indexUpdates.get(hTableInterfaceReference);
                for (Pair update : updates) {
                    Mutation m = update.getFirst();
                    if (m instanceof Put) {
                        // This will be done before the data table row is updated (i.e., in the first write phase)
                        context.preIndexUpdates.put(hTableInterfaceReference, m);
                    } else {
                        // Set the status of the index row to "unverified"
                        Put unverifiedPut = new Put(m.getRow());
                        unverifiedPut.addColumn(
                            emptyCF, emptyCQ, batchTimestamp, QueryConstants.UNVERIFIED_BYTES);
                        // This will be done before the data table row is updated (i.e., in the first write phase)
                        context.preIndexUpdates.put(hTableInterfaceReference, unverifiedPut);
                    }
                }
            }
            TracingUtils.addAnnotation(current, "index update count", updateCount);
        }
    }

    protected PhoenixIndexMetaData getPhoenixIndexMetaData(ObserverContext observerContext,
                                                           MiniBatchOperationInProgress miniBatchOp)
            throws IOException {
        IndexMetaData indexMetaData = this.builder.getIndexMetaData(miniBatchOp);
        if (!(indexMetaData instanceof PhoenixIndexMetaData)) {
            throw new DoNotRetryIOException(
                    "preBatchMutateWithExceptions: indexMetaData is not an instance of "+PhoenixIndexMetaData.class.getName() +
                            ", current table is:" +
                            observerContext.getEnvironment().getRegion().getRegionInfo().getTable().getNameAsString());
        }
        return (PhoenixIndexMetaData)indexMetaData;
    }

    private void preparePostIndexMutations(BatchMutateContext context,
                                           long batchTimestamp,
                                           PhoenixIndexMetaData indexMetaData) {
        context.postIndexUpdates = ArrayListMultimap.create();
        List maintainers = indexMetaData.getIndexMaintainers();
        for (IndexMaintainer indexMaintainer : maintainers) {
            byte[] emptyCF = indexMaintainer.getEmptyKeyValueFamily().copyBytesIfNecessary();
            byte[] emptyCQ = indexMaintainer.getEmptyKeyValueQualifier();
            HTableInterfaceReference hTableInterfaceReference =
                    new HTableInterfaceReference(new ImmutableBytesPtr(indexMaintainer.getIndexTableName()));
            List> updates = context.indexUpdates.get(hTableInterfaceReference);
            for (Pair update : updates) {
                Mutation m = update.getFirst();
                if (m instanceof Put) {
                    if (!indexMaintainer.isUncovered()) {
                        Put verifiedPut = new Put(m.getRow());
                        // Set the status of the index row to "verified"
                        verifiedPut.addColumn(emptyCF, emptyCQ, batchTimestamp,
                                QueryConstants.VERIFIED_BYTES);
                        context.postIndexUpdates.put(hTableInterfaceReference, verifiedPut);
                    }
                } else {
                    context.postIndexUpdates.put(hTableInterfaceReference, m);
                }
            }
        }
        // all cleanup will be done in postBatchMutateIndispensably()
    }

    private static void identifyIndexMaintainerTypes(PhoenixIndexMetaData indexMetaData, BatchMutateContext context) {
        for (IndexMaintainer indexMaintainer : indexMetaData.getIndexMaintainers()) {
            if (indexMaintainer instanceof TransformMaintainer) {
                context.hasTransform = true;
            } else if (indexMaintainer.isLocalIndex()) {
                context.hasLocalIndex = true;
            } else if (indexMaintainer.isUncovered()) {
                context.hasUncoveredIndex = true;
            } else {
                context.hasGlobalIndex = true;
            }
        }
    }

    private void identifyMutationTypes(MiniBatchOperationInProgress miniBatchOp,
                                              BatchMutateContext context) {
        for (int i = 0; i < miniBatchOp.size(); i++) {
            Mutation m = miniBatchOp.getOperation(i);
            if (this.builder.isAtomicOp(m)) {
                context.hasAtomic = true;
                if (context.hasDelete) {
                    return;
                }
            } else if (m instanceof Delete)
                context.hasDelete = true;
            if (context.hasAtomic) {
                return;
            }
        }
    }

    /**
     * Wait for the previous batches to complete. If any of the previous batch fails then this
     * batch will fail too and needs to be retried. The rows are locked by the caller.
     * @param table
     * @param context
     * @throws Throwable
     */
    private void waitForPreviousConcurrentBatch(TableName table, BatchMutateContext context)
            throws Throwable {
        for (BatchMutateContext lastContext : context.lastConcurrentBatchContext.values()) {
            BatchMutatePhase phase = lastContext.getCurrentPhase();
            if (phase == BatchMutatePhase.FAILED) {
                context.currentPhase = BatchMutatePhase.FAILED;
                break;
            } else if (phase == BatchMutatePhase.PRE) {
                CountDownLatch countDownLatch = lastContext.getCountDownLatch();
                if (countDownLatch == null) {
                    // phase changed from PRE to either FAILED or POST
                    if (phase == BatchMutatePhase.FAILED) {
                        context.currentPhase = BatchMutatePhase.FAILED;
                        break;
                    }
                    continue;
                }
                // Release the locks so that the previous concurrent mutation can go into the post phase
                unlockRows(context);
                // Wait for at most one concurrentMutationWaitDuration for each level in the dependency tree of batches.
                // lastContext.getMaxPendingRowCount() is the depth of the subtree rooted at the batch pointed by lastContext
                if (!countDownLatch.await((lastContext.getMaxPendingRowCount() + 1) * concurrentMutationWaitDuration,
                        TimeUnit.MILLISECONDS)) {
                    context.currentPhase = BatchMutatePhase.FAILED;
                    LOG.debug(String.format("latch timeout context %s last %s", context, lastContext));
                    break;
                }
                if (lastContext.getCurrentPhase() == BatchMutatePhase.FAILED) {
                    context.currentPhase = BatchMutatePhase.FAILED;
                    break;
                }
                // Acquire the locks again before letting the region proceed with data table updates
                lockRows(context);
                LOG.debug(String.format("context %s last %s exit phase %s", context, lastContext,
                        lastContext.getCurrentPhase()));
            }
        }
        if (context.currentPhase == BatchMutatePhase.FAILED) {
            // This batch needs to be retried since one of the previous concurrent batches has not completed yet.
            // Throwing an IOException will result in retries of this batch. Removal of reference counts and
            // locks for the rows of this batch will be done in postBatchMutateIndispensably()
            throw new IOException("One of the previous concurrent mutations has not completed. " +
                    "The batch needs to be retried " + table.getNameAsString());
        }
    }

    private boolean shouldSleep(BatchMutateContext context) {
        for (ImmutableBytesPtr ptr : context.rowsToLock) {
            for (Set set : batchesWithLastTimestamp) {
                if (set.contains(ptr)) {
                    return true;
                }
            }
        }
        return false;
    }
    private long getBatchTimestamp(BatchMutateContext context, TableName table)
            throws InterruptedException {
        synchronized (this) {
            long ts = EnvironmentEdgeManager.currentTimeMillis();
            if (ts != lastTimestamp) {
                // The timestamp for this batch will be different from the last batch processed.
                lastTimestamp = ts;
                batchesWithLastTimestamp.clear();
                batchesWithLastTimestamp.add(context.rowsToLock);
                return ts;
            } else {
                if (!shouldSleep(context)) {
                    // There is no need to sleep as the last batches with the same timestamp
                    // do not have a common row this batch
                    batchesWithLastTimestamp.add(context.rowsToLock);
                    return ts;
                }
            }
        }
        // Sleep for one millisecond. The sleep is necessary to get different timestamps
        // for concurrent batches that share common rows.
        Thread.sleep(1);
        LOG.debug("slept 1ms for " + table.getNameAsString());
        synchronized (this) {
            long ts = EnvironmentEdgeManager.currentTimeMillis();
            if (ts != lastTimestamp) {
                // The timestamp for this batch will be different from the last batch processed.
                lastTimestamp = ts;
                batchesWithLastTimestamp.clear();
            }
            // We do not have to check again if we need to sleep again since we got the next
            // timestamp while holding the row locks. This mean there cannot be a new
            // mutation with the same row attempting get the same timestamp
            batchesWithLastTimestamp.add(context.rowsToLock);
            return ts;
        }
    }
    public void preBatchMutateWithExceptions(ObserverContext c,
                                             MiniBatchOperationInProgress miniBatchOp) throws Throwable {
        PhoenixIndexMetaData indexMetaData = getPhoenixIndexMetaData(c, miniBatchOp);
        BatchMutateContext context = new BatchMutateContext(indexMetaData.getClientVersion());
        setBatchMutateContext(c, context);
        identifyIndexMaintainerTypes(indexMetaData, context);
        identifyMutationTypes(miniBatchOp, context);
        context.populateOriginalMutations(miniBatchOp);

        if (context.hasDelete) {
            // Need to add cell tags to Delete Marker before we do any index processing
            // since we add tags to tables which doesn't have indexes also.
            ServerIndexUtil.setDeleteAttributes(miniBatchOp);
        }

        // Exclusively lock all rows to do consistent writes over multiple tables
        // (i.e., the data and its index tables)
        populateRowsToLock(miniBatchOp, context);
        // early exit if it turns out we don't have any update for indexes
        if (context.rowsToLock.isEmpty()) {
            return;
        }
        lockRows(context);
        long onDupCheckTime = 0;

        if (context.hasAtomic || context.hasGlobalIndex || context.hasUncoveredIndex || context.hasTransform) {
            // Retrieve the current row states from the data table while holding the lock.
            // This is needed for both atomic mutations and global indexes
            long start = EnvironmentEdgeManager.currentTimeMillis();
            context.dataRowStates = new HashMap>(context.rowsToLock.size());
            if (context.hasGlobalIndex || context.hasTransform || context.hasAtomic ||
                    context.hasDelete ||  (context.hasUncoveredIndex &&
                    isPartialUncoveredIndexMutation(indexMetaData, miniBatchOp))) {
                getCurrentRowStates(c, context);
            }
            onDupCheckTime += (EnvironmentEdgeManager.currentTimeMillis() - start);
        }

        if (context.hasAtomic) {
            long start = EnvironmentEdgeManager.currentTimeMillis();
            // add the mutations for conditional updates to the mini batch
            addOnDupMutationsToBatch(miniBatchOp, context);

            // release locks for ON DUPLICATE KEY IGNORE since we won't be changing those rows
            // this is needed so that we can exit early
            releaseLocksForOnDupIgnoreMutations(miniBatchOp, context);
            onDupCheckTime += (EnvironmentEdgeManager.currentTimeMillis() - start);
            metricSource.updateDuplicateKeyCheckTime(dataTableName, onDupCheckTime);

            // early exit if we are not changing any rows
            if (context.rowsToLock.isEmpty()) {
                return;
            }
        }

        TableName table = c.getEnvironment().getRegion().getRegionInfo().getTable();
        long batchTimestamp = getBatchTimestamp(context, table);
        // Update the timestamps of the data table mutations to prevent overlapping timestamps
        // (which prevents index inconsistencies as this case is not handled).
        setTimestamps(miniBatchOp, builder, batchTimestamp);
        if (context.hasGlobalIndex || context.hasUncoveredIndex || context.hasTransform) {
            // Prepare next data rows states for pending mutations (for global indexes)
            prepareDataRowStates(c, miniBatchOp, context, batchTimestamp);
            // early exit if it turns out we don't have any edits
            long start = EnvironmentEdgeManager.currentTimeMillis();
            preparePreIndexMutations(context, batchTimestamp, indexMetaData);
            metricSource.updateIndexPrepareTime(dataTableName,
                EnvironmentEdgeManager.currentTimeMillis() - start);
            // Release the locks before making RPC calls for index updates
            unlockRows(context);
            // Do the first phase index updates
            doPre(context);
            // Acquire the locks again before letting the region proceed with data table updates
            lockRows(context);
            if (context.lastConcurrentBatchContext != null) {
                waitForPreviousConcurrentBatch(table, context);
            }
            preparePostIndexMutations(context, batchTimestamp, indexMetaData);
        }
        if (context.hasLocalIndex) {
            // Group all the updates for a single row into a single update to be processed (for local indexes)
            Collection mutations = groupMutations(miniBatchOp, context);
            handleLocalIndexUpdates(table, miniBatchOp, mutations, indexMetaData);
        }
        if (failDataTableUpdatesForTesting) {
            throw new DoNotRetryIOException("Simulating the data table write failure");
        }
    }

    /**
     * In case of ON DUPLICATE KEY IGNORE, if the row already exists no mutations will be
     * generated so release the row lock.
     */
    private void releaseLocksForOnDupIgnoreMutations(MiniBatchOperationInProgress miniBatchOp,
                                                     BatchMutateContext context) {
        for (int i = 0; i < miniBatchOp.size(); i++) {
            if (!isAtomicOperationComplete(miniBatchOp.getOperationStatus(i))) {
                continue;
            }
            Mutation m = miniBatchOp.getOperation(i);
            if (!this.builder.isAtomicOp(m)) {
                continue;
            }
            ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
            Iterator rowLockIterator = context.rowLocks.iterator();
            while(rowLockIterator.hasNext()){
                RowLock rowLock = rowLockIterator.next();
                ImmutableBytesPtr rowKey = rowLock.getRowKey();
                if (row.equals(rowKey)) {
                    PendingRow pendingRow = pendingRows.get(rowKey);
                    if (pendingRow != null) {
                        pendingRow.remove();
                    }
                    rowLock.release();
                    rowLockIterator.remove();
                    context.rowsToLock.remove(row);
                    break;
                }
            }
        }
    }

    private void setBatchMutateContext(ObserverContext c, BatchMutateContext context) {
      this.batchMutateContext.set(context);
    }

    private BatchMutateContext getBatchMutateContext(ObserverContext c) {
      return this.batchMutateContext.get();
    }

    private void removeBatchMutateContext(ObserverContext c) {
      this.batchMutateContext.remove();
    }

    @Override
    public void preWALAppend(ObserverContext c, WALKey key,
                             WALEdit edit) {
        if (shouldWALAppend) {
            BatchMutateContext context = getBatchMutateContext(c);
            appendMutationAttributesToWALKey(key, context);
        }
    }

    public void appendMutationAttributesToWALKey(WALKey key,
                                                        IndexRegionObserver.BatchMutateContext context) {
        if (context != null && context.getOriginalMutations().size() > 0) {
            Mutation firstMutation = context.getOriginalMutations().get(0);
            Map attrMap = firstMutation.getAttributesMap();
            for (MutationState.MutationMetadataType metadataType :
                    MutationState.MutationMetadataType.values()) {
                String metadataTypeKey = metadataType.toString();
                if (attrMap.containsKey(metadataTypeKey)) {
                    IndexRegionObserver.appendToWALKey(key, metadataTypeKey,
                            attrMap.get(metadataTypeKey));
                }
            }
        }
    }

    /**
     * When this hook is called, all the rows in the batch context are locked if the batch of
     * mutations is successful. Because the rows are locked, we can safely make updates to
     * pending row states in memory and perform the necessary cleanup in that case.
     *
     * However, when the batch fails, then some of the rows may not be locked. In that case,
     * we remove the pending row states from the concurrent hash map without updating them since
     * pending rows states become invalid when a batch fails.
     */
  @Override
  public void postBatchMutateIndispensably(ObserverContext c,
      MiniBatchOperationInProgress miniBatchOp, final boolean success) throws IOException {
      if (this.disabled) {
          return;
      }
      BatchMutateContext context = getBatchMutateContext(c);
      if (context == null) {
          return;
      }
      try {
          if (success) {
              context.currentPhase = BatchMutatePhase.POST;
              if(context.hasAtomic && miniBatchOp.size() == 1) {
                  if (!isAtomicOperationComplete(miniBatchOp.getOperationStatus(0))) {
                      byte[] retVal = PInteger.INSTANCE.toBytes(1);
                      Cell cell = PhoenixKeyValueUtil.newKeyValue(
                              miniBatchOp.getOperation(0).getRow(), Bytes.toBytes(UPSERT_CF),
                              Bytes.toBytes(UPSERT_STATUS_CQ), 0, retVal, 0, retVal.length);
                      Result result = Result.create(new ArrayList<>(Arrays.asList(cell)));
                      miniBatchOp.setOperationStatus(0,
                              new OperationStatus(SUCCESS, result));
                  }
              }
          } else {
              context.currentPhase = BatchMutatePhase.FAILED;
          }
          context.countDownAllLatches();
          removePendingRows(context);
          if (context.indexUpdates != null) {
              context.indexUpdates.clear();
          }
          unlockRows(context);
          this.builder.batchCompleted(miniBatchOp);

          if (success) { // The pre-index and data table updates are successful, and now, do post index updates
              doPost(c, context);
          }
       } finally {
           removeBatchMutateContext(c);
       }
  }

  private void doPost(ObserverContext c, BatchMutateContext context) throws IOException {
      long start = EnvironmentEdgeManager.currentTimeMillis();

      try {
          if (failPostIndexUpdatesForTesting) {
              throw new DoNotRetryIOException("Simulating the last (i.e., post) index table write failure");
          }
          doIndexWritesWithExceptions(context, true);
          metricSource.updatePostIndexUpdateTime(dataTableName,
              EnvironmentEdgeManager.currentTimeMillis() - start);
      } catch (Throwable e) {
          metricSource.updatePostIndexUpdateFailureTime(dataTableName,
              EnvironmentEdgeManager.currentTimeMillis() - start);
          metricSource.incrementPostIndexUpdateFailures(dataTableName);
          // Ignore the failures in the third write phase
      }
  }

  private void doIndexWritesWithExceptions(BatchMutateContext context, boolean post)
            throws IOException {
      ListMultimap indexUpdates = post ? context.postIndexUpdates : context.preIndexUpdates;
      //short circuit, if we don't need to do any work

      if (context == null || indexUpdates == null || indexUpdates.isEmpty()) {
          return;
      }

      // get the current span, or just use a null-span to avoid a bunch of if statements
      try (TraceScope scope = Trace.startSpan("Completing " + (post ? "post" : "pre") + " index writes")) {
          Span current = scope.getSpan();
          if (current == null) {
              current = NullSpan.INSTANCE;
          }
          current.addTimelineAnnotation("Actually doing " + (post ? "post" : "pre") + " index update for first time");
          if (post) {
              postWriter.write(indexUpdates, false, context.clientVersion);
          } else {
              preWriter.write(indexUpdates, false, context.clientVersion);
          }
      }
  }

  private void removePendingRows(BatchMutateContext context) {
      for (ImmutableBytesPtr rowKey : context.rowsToLock) {
          PendingRow pendingRow = pendingRows.get(rowKey);
          if (pendingRow != null) {
              pendingRow.remove();
          }
      }
  }

  private void doPre(BatchMutateContext context) throws IOException {
      long start = 0;
      try {
          start = EnvironmentEdgeManager.currentTimeMillis();
          if (failPreIndexUpdatesForTesting) {
              throw new DoNotRetryIOException("Simulating the first (i.e., pre) index table write failure");
          }
          doIndexWritesWithExceptions(context, false);
          metricSource.updatePreIndexUpdateTime(dataTableName,
              EnvironmentEdgeManager.currentTimeMillis() - start);
      } catch (Throwable e) {
          metricSource.updatePreIndexUpdateFailureTime(dataTableName,
              EnvironmentEdgeManager.currentTimeMillis() - start);
          metricSource.incrementPreIndexUpdateFailures(dataTableName);
          // Re-acquire all locks since we released them before making index updates
          // Removal of reference counts and locks for the rows of this batch will be
          // done in postBatchMutateIndispensably()
          lockRows(context);
          rethrowIndexingException(e);
      }
  }

  private void extractExpressionsAndColumns(DataInputStream input,
                              List>> operations,
                              final Set colsReadInExpr) throws IOException {
      while (true) {
          ExpressionVisitor visitor = new StatelessTraverseAllExpressionVisitor() {
              @Override
              public Void visit(KeyValueColumnExpression expression) {
                  colsReadInExpr.add(new ColumnReference(expression.getColumnFamily(), expression.getColumnQualifier()));
                  return null;
              }
          };
          try {
              int nExpressions = WritableUtils.readVInt(input);
              List expressions = Lists.newArrayListWithExpectedSize(nExpressions);
              for (int i = 0; i < nExpressions; i++) {
                  Expression expression = ExpressionType.values()[WritableUtils.readVInt(input)].newInstance();
                  expression.readFields(input);
                  expressions.add(expression);
                  expression.accept(visitor);
              }
              PTableProtos.PTable tableProto = PTableProtos.PTable.parseDelimitedFrom(input);
              PTable table = PTableImpl.createFromProto(tableProto);
              operations.add(new Pair<>(table, expressions));
          } catch (EOFException e) {
              break;
          }
      }
  }

    /**
     * This function has been adapted from PhoenixIndexBuilder#executeAtomicOp().
     * The critical difference being that the code in PhoenixIndexBuilder#executeAtomicOp()
     * generates the mutations by reading the latest data table row from HBase but in order
     * to correctly support concurrent index mutations we need to always read the latest
     * data table row from memory.
     * It takes in an atomic Put mutation and generates a list of Put and Delete mutations.
     * The mutation list will be empty in two cases:
     * 1) ON DUPLICATE KEY IGNORE and the row already exists;
     * 2) ON DUPLICATE KEY UPDATE if CASE expression is specified and in each of them the new
     * value is the same as the old value in the ELSE-clause.
     * Otherwise, we will generate one Put mutation and optionally one Delete mutation (with
     * DeleteColumn type cells for all columns set to null).
     */
  private List generateOnDupMutations(BatchMutateContext context, Put atomicPut) throws IOException {
      List mutations = Lists.newArrayListWithExpectedSize(2);
      byte[] opBytes = atomicPut.getAttribute(ATOMIC_OP_ATTRIB);
      if (opBytes == null) { // Unexpected
          return null;
      }
      Put put = null;
      Delete delete = null;

      // mutations returned by this function will have the LATEST timestamp
      // later these timestamps will be updated by the IndexRegionObserver#setTimestamps() function
      long ts = HConstants.LATEST_TIMESTAMP;

      byte[] rowKey = atomicPut.getRow();
      ImmutableBytesPtr rowKeyPtr = new ImmutableBytesPtr(rowKey);
      // Get the latest data row state
      Pair dataRowState = context.dataRowStates.get(rowKeyPtr);
      Put currentDataRowState = dataRowState != null ? dataRowState.getFirst() : null;

      if (PhoenixIndexBuilderHelper.isDupKeyIgnore(opBytes)) {
          if (currentDataRowState == null) {
              // new row
              mutations.add(atomicPut);
          }
          return mutations;
      }

      ByteArrayInputStream stream = new ByteArrayInputStream(opBytes);
      DataInputStream input = new DataInputStream(stream);
      boolean skipFirstOp = input.readBoolean();
      short repeat = input.readShort();

      List>> operations = Lists.newArrayListWithExpectedSize(3);
      final Set colsReadInExpr = new HashSet<>();
      // deserialize the conditional update expressions and
      // extract the columns that are read in the conditional expressions
      extractExpressionsAndColumns(input, operations, colsReadInExpr);
      int estimatedSize = colsReadInExpr.size();

      // initialized to either the incoming new row or the current row
      // stores the intermediate values as we apply conditional update expressions
      List flattenedCells;
      // read the column values requested in the get from the current data row
      List cells = IndexUtil.readColumnsFromRow(currentDataRowState, colsReadInExpr);

      // store current cells into a map where the key is ColumnReference of the column family and
      // column qualifier, and value is a pair of cell and a boolean. The value of the boolean
      // will be true if the expression is CaseExpression and Else-clause is evaluated to be
      // true, will be null if there is no expression on this column, otherwise false
      Map> currColumnCellExprMap = new HashMap<>();

      if (currentDataRowState == null) { // row doesn't exist
          if (skipFirstOp) {
              if (operations.size() <= 1 && repeat <= 1) {
                  // early exit since there is only one ON DUPLICATE KEY UPDATE
                  // clause which is ignored because the row doesn't exist so
                  // simply use the values in UPSERT VALUES
                  mutations.add(atomicPut);
                  return mutations;
              }
              // If there are multiple ON DUPLICATE KEY UPDATE on a new row,
              // the first one is skipped
              repeat--;
          }
          // Base current state off of new row
          flattenedCells = flattenCells(atomicPut);
      } else {
          // Base current state off of existing row
          flattenedCells = cells;
          // store all current cells from currentDataRowState
          for (Map.Entry> entry :
                  currentDataRowState.getFamilyCellMap().entrySet()) {
              for (Cell cell : new ArrayList<>(entry.getValue())) {
                  byte[] family = CellUtil.cloneFamily(cell);
                  byte[] qualifier = CellUtil.cloneQualifier(cell);
                  ColumnReference colRef = new ColumnReference(family, qualifier);
                  currColumnCellExprMap.put(colRef, new Pair<>(cell, null));
              }
          }
      }

      MultiKeyValueTuple tuple = new MultiKeyValueTuple(flattenedCells);
      ImmutableBytesWritable ptr = new ImmutableBytesWritable();

      // for each conditional upsert in the batch
      for (int opIndex = 0; opIndex < operations.size(); opIndex++) {
          Pair> operation = operations.get(opIndex);
          PTable table = operation.getFirst();
          List expressions = operation.getSecond();
          for (int j = 0; j < repeat; j++) { // repeater loop
              ptr.set(rowKey);
              // Sort the list of cells (if they've been flattened in which case they're
              // not necessarily ordered correctly).
              if (flattenedCells != null) {
                  Collections.sort(flattenedCells, CellComparator.getInstance());
              }
              PRow row = table.newRow(GenericKeyValueBuilder.INSTANCE, ts, ptr, false);
              int adjust = table.getBucketNum() == null ? 1 : 2;
              for (int i = 0; i < expressions.size(); i++) {
                  Expression expression = expressions.get(i);
                  ptr.set(EMPTY_BYTE_ARRAY);
                  expression.evaluate(tuple, ptr);
                  PColumn column = table.getColumns().get(i + adjust);
                  Object value = expression.getDataType().toObject(ptr, column.getSortOrder());
                  // We are guaranteed that the two column will have the same type
                  if (!column.getDataType().isSizeCompatible(ptr, value, column.getDataType(),
                      expression.getSortOrder(), expression.getMaxLength(), expression.getScale(),
                      column.getMaxLength(), column.getScale())) {
                      throw new DataExceedsCapacityException(column.getDataType(), column.getMaxLength(),
                          column.getScale(), column.getName().getString());
                  }
                  column.getDataType().coerceBytes(ptr, value, expression.getDataType(), expression.getMaxLength(),
                      expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(),
                      column.getSortOrder(), table.rowKeyOrderOptimizable());
                  byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
                  row.setValue(column, bytes);

                  // If the column exist in currColumnCellExprMap, set the boolean value in the
                  // map to be true if the expression is CaseExpression and the Else-clause is
                  // evaluated to be true
                  ColumnReference colRef = new ColumnReference(column.getFamilyName().getBytes(),
                          column.getColumnQualifierBytes());
                  if (currColumnCellExprMap.containsKey(colRef)) {
                      Pair valuePair = currColumnCellExprMap.get(colRef);
                      if (expression instanceof CaseExpression
                              && ((CaseExpression) expression).evaluateIndexOf(tuple, ptr)
                              == expression.getChildren().size() - 1) {
                          valuePair.setSecond(true);
                      } else {
                          valuePair.setSecond(false);
                      }
                  }
              }
              List updatedCells = Lists.newArrayListWithExpectedSize(estimatedSize);
              List newMutations = row.toRowMutations();
              for (Mutation source : newMutations) {
                  flattenCells(source, updatedCells);
              }
              // update the cells to the latest values calculated above
              flattenedCells = mergeCells(flattenedCells, updatedCells);
              // we need to retrieve empty cell later on which relies on binary search
              flattenedCells.sort(CellComparator.getInstance());
              tuple.setKeyValues(flattenedCells);
          }
          // Repeat only applies to first statement
          repeat = 1;
      }

      put = new Put(rowKey);
      delete = new Delete(rowKey);
      transferAttributes(atomicPut, put);
      transferAttributes(atomicPut, delete);
      for (int i = 0; i < tuple.size(); i++) {
          Cell cell = tuple.getValue(i);
          if (cell.getType() == Cell.Type.Put) {
              if (checkCellNeedUpdate(cell, currColumnCellExprMap)) {
                  put.add(cell);
              }
          } else {
              delete.add(cell);
          }
      }

      if (!put.isEmpty() || !delete.isEmpty()) {
          PTable table = operations.get(0).getFirst();
          addEmptyKVCellToPut(put, tuple, table);
      }

      if (!put.isEmpty()) {
          mutations.add(put);
      }
      if (!delete.isEmpty()) {
          mutations.add(delete);
      }

      return mutations;
  }

    private void addEmptyKVCellToPut(Put put, MultiKeyValueTuple tuple, PTable table) throws IOException {
        byte[] emptyCF = SchemaUtil.getEmptyColumnFamily(table);
        byte[] emptyCQ = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
        Cell emptyKVCell = tuple.getValue(emptyCF, emptyCQ);
        if (emptyKVCell != null) {
            put.add(emptyKVCell);
        }
    }

    private static List flattenCells(Mutation m) {
        List flattenedCells = new ArrayList<>();
        flattenCells(m, flattenedCells);
        return flattenedCells;
    }

    private static void flattenCells(Mutation m, List flattenedCells) {
        for (List cells : m.getFamilyCellMap().values()) {
            flattenedCells.addAll(cells);
        }
    }

    /**
     * This function is to check if a cell need to be updated, based on the current cells' values.
     * The cell will not be updated only if the column exist in the expression in which CASE is
     * specified and the new value is the same as the old value in the ELSE-clause, otherwise it
     * should be updated.
     *
     * @param cell the cell with new value to be checked
     * @param colCellExprMap the column reference map with cell current value
     * @return true if the cell need update, false otherwise
     */
    private boolean checkCellNeedUpdate(Cell cell,
                                        Map> colCellExprMap) {
        byte[] family = CellUtil.cloneFamily(cell);
        byte[] qualifier = CellUtil.cloneQualifier(cell);
        ColumnReference colRef = new ColumnReference(family, qualifier);

        // if cell not exist in the map, meaning that they are new and need update
        if (colCellExprMap.isEmpty() || !colCellExprMap.containsKey(colRef)) {
            return true;
        }

        Pair valuePair = colCellExprMap.get(colRef);
        Boolean isInCaseExpressionElseClause = valuePair.getSecond();
        if (isInCaseExpressionElseClause == null) {
            return false;
        }
        if (!isInCaseExpressionElseClause) {
            return true;
        }
        Cell oldCell = valuePair.getFirst();
        ImmutableBytesPtr newValuePtr = new ImmutableBytesPtr(cell.getValueArray(),
                cell.getValueOffset(), cell.getValueLength());
        ImmutableBytesPtr oldValuePtr = new ImmutableBytesPtr(oldCell.getValueArray(),
                oldCell.getValueOffset(), oldCell.getValueLength());
        return !Bytes.equals(oldValuePtr.get(), oldValuePtr.getOffset(), oldValuePtr.getLength(),
                newValuePtr.get(), newValuePtr.getOffset(), newValuePtr.getLength());
    }

    /**
     * ensure that the generated mutations have all the attributes like schema
     */
    private static void transferAttributes(Mutation source, Mutation target) {
        for (Map.Entry entry : source.getAttributesMap().entrySet()) {
            target.setAttribute(entry.getKey(), entry.getValue());
        }
    }

    /**
     * First take all the cells that are present in the latest. Then look at current
     * and any cell not present in latest is taken.
     */
    private static List mergeCells(List current, List latest) {
        Map latestColVals = Maps.newHashMapWithExpectedSize(latest.size() + current.size());

        // first take everything present in latest
        for (Cell cell : latest) {
            byte[] family = CellUtil.cloneFamily(cell);
            byte[] qualifier = CellUtil.cloneQualifier(cell);
            ColumnReference colInfo = new ColumnReference(family, qualifier);
            latestColVals.put(colInfo, cell);
        }

        // check for any leftovers in current
        for (Cell cell : current) {
            byte[] family = CellUtil.cloneFamily(cell);
            byte[] qualifier = CellUtil.cloneQualifier(cell);
            ColumnReference colInfo = new ColumnReference(family, qualifier);
            if (!latestColVals.containsKey(colInfo)) {
                latestColVals.put(colInfo, cell);
            }
        }
        return Lists.newArrayList(latestColVals.values());
    }

    public static void appendToWALKey(WALKey key, String attrKey, byte[] attrValue) {
        key.addExtendedAttribute(attrKey, attrValue);
    }

    public static byte[] getAttributeValueFromWALKey(WALKey key, String attrKey) {
        return key.getExtendedAttribute(attrKey);
    }

    public static Map getAttributeValuesFromWALKey(WALKey key) {
        return new HashMap(key.getExtendedAttributes());
    }

    /**
     * Determines whether the atomic operation is complete based on the operation status.
     * HBase returns null Result by default for successful Put and Delete mutations, only for
     * Increment and Append mutations, non-null Result is returned by default.
     * @param status the operation status.
     * @return true if the atomic operation is completed, false otherwise.
     */
    public static boolean isAtomicOperationComplete(OperationStatus status) {
        return status.getOperationStatusCode() == SUCCESS && status.getResult() != null;
    }
}