org.apache.hadoop.hbase.regionserver.Region Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.IsolationLevel;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay;

import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.Message;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;

/**
 * Regions store data for a certain region of a table.  It stores all columns
 * for each row. A given table consists of one or more Regions.
 *
 * An Region is defined by its table and its key extent.
 *
 * 
Locking at the Region level serves only one purpose: preventing the
 * region from being closed (and consequently split) while other operations
 * are ongoing. Each row level operation obtains both a row lock and a region
 * read lock for the duration of the operation. While a scanner is being
 * constructed, getScanner holds a read lock. If the scanner is successfully
 * constructed, it holds a read lock until it is closed. A close takes out a
 * write lock and consequently will block for ongoing operations and will block
 * new operations from starting while the close is in progress.
 */
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
@InterfaceStability.Evolving
public interface Region extends ConfigurationObserver {

  ///////////////////////////////////////////////////////////////////////////
  // Region state

  /** @return region information for this region */
  HRegionInfo getRegionInfo();

  /** @return table descriptor for this region */
  HTableDescriptor getTableDesc();

  /** @return true if region is available (not closed and not closing) */
  boolean isAvailable();

  /** @return true if region is closed */
  boolean isClosed();

  /** @return True if closing process has started */
  boolean isClosing();

  /** @return True if region is in recovering state */
  boolean isRecovering();

  /** @return True if region is read only */
  boolean isReadOnly();

  /**
   * Return the list of Stores managed by this region
   * 
Use with caution.  Exposed for use of fixup utilities.
   * @return a list of the Stores managed by this region
   */
  List getStores();

  /**
   * Return the Store for the given family
   * 
Use with caution.  Exposed for use of fixup utilities.
   * @return the Store for the given family
   */
  Store getStore(byte[] family);

  /** @return list of store file names for the given families */
  List getStoreFileList(byte [][] columns);

  /**
   * Check the region's underlying store files, open the files that have not
   * been opened yet, and remove the store file readers for store files no
   * longer available.
   * @throws IOException
   */
  boolean refreshStoreFiles() throws IOException;

  /** @return the latest sequence number that was read from storage when this region was opened */
  long getOpenSeqNum();

  /** @return the max sequence id of flushed data on this region; no edit in memory will have
   * a sequence id that is less that what is returned here.
   */
  long getMaxFlushedSeqId();

  /** @return the oldest flushed sequence id for the given family; can be beyond
   * {@link #getMaxFlushedSeqId()} in case where we've flushed a subset of a regions column
   * families
   * @deprecated Since version 1.2.0. Exposes too much about our internals; shutting it down.
   * Do not use.
   */
  @VisibleForTesting
  @Deprecated
  public long getOldestSeqIdOfStore(byte[] familyName);

  /**
   * This can be used to determine the last time all files of this region were major compacted.
   * @param majorCompactionOnly Only consider HFile that are the result of major compaction
   * @return the timestamp of the oldest HFile for all stores of this region
   */
  long getOldestHfileTs(boolean majorCompactionOnly) throws IOException;

  /**
   * @return map of column family names to max sequence id that was read from storage when this
   * region was opened
   */
  public Map getMaxStoreSeqId();

  /** @return true if loading column families on demand by default */
  boolean isLoadingCfsOnDemandDefault();

  /** @return readpoint considering given IsolationLevel */
  long getReadpoint(IsolationLevel isolationLevel);

  /**
   * @return The earliest time a store in the region was flushed. All
   *         other stores in the region would have been flushed either at, or
   *         after this time.
   */
  long getEarliestFlushTimeForAllStores();

  ///////////////////////////////////////////////////////////////////////////
  // Metrics

  /** @return read requests count for this region */
  long getReadRequestsCount();

  /**
   * Update the read request count for this region
   * @param i increment
   */
  void updateReadRequestsCount(long i);

  /** @return write request count for this region */
  long getWriteRequestsCount();

  /**
   * Update the write request count for this region
   * @param i increment
   */
  void updateWriteRequestsCount(long i);

  /** @return memstore size for this region, in bytes */
  long getMemstoreSize();

  /** @return the number of mutations processed bypassing the WAL */
  long getNumMutationsWithoutWAL();

  /** @return the size of data processed bypassing the WAL, in bytes */
  long getDataInMemoryWithoutWAL();

  /** @return the number of blocked requests */
  long getBlockedRequestsCount();

  /** @return the number of checkAndMutate guards that passed */
  long getCheckAndMutateChecksPassed();

  /** @return the number of failed checkAndMutate guards */
  long getCheckAndMutateChecksFailed();

  /** @return the MetricsRegion for this region */
  MetricsRegion getMetrics();

  /** @return the block distribution for all Stores managed by this region */
  HDFSBlocksDistribution getHDFSBlocksDistribution();

  ///////////////////////////////////////////////////////////////////////////
  // Locking

  // Region read locks

  /**
   * Operation enum is used in {@link Region#startRegionOperation} to provide context for
   * various checks before any region operation begins.
   */
  enum Operation {
    ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
    REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT, SNAPSHOT
  }

  /**
   * This method needs to be called before any public call that reads or
   * modifies data.
   * Acquires a read lock and checks if the region is closing or closed.
   * 
{@link #closeRegionOperation} MUST then always be called after
   * the operation has completed, whether it succeeded or failed.
   * @throws IOException
   */
  void startRegionOperation() throws IOException;

  /**
   * This method needs to be called before any public call that reads or
   * modifies data.
   * Acquires a read lock and checks if the region is closing or closed.
   * 
{@link #closeRegionOperation} MUST then always be called after
   * the operation has completed, whether it succeeded or failed.
   * @param op The operation is about to be taken on the region
   * @throws IOException
   */
  void startRegionOperation(Operation op) throws IOException;

  /**
   * Closes the region operation lock.
   * @throws IOException
   */
  void closeRegionOperation() throws IOException;

  /**
   * Closes the region operation lock. This needs to be called in the finally block corresponding
   * to the try block of {@link #startRegionOperation(Operation)}
   * @throws IOException
   */
  void closeRegionOperation(Operation op) throws IOException;

  // Row write locks

  /**
   * Row lock held by a given thread.
   * One thread may acquire multiple locks on the same row simultaneously.
   * The locks must be released by calling release() from the same thread.
   */
  public interface RowLock {
    /**
     * Release the given lock.  If there are no remaining locks held by the current thread
     * then unlock the row and allow other threads to acquire the lock.
     * @throws IllegalArgumentException if called by a different thread than the lock owning
     *     thread
     */
    void release();
  }

  /**
   *
   * Get a row lock for the specified row. All locks are reentrant.
   *
   * Before calling this function make sure that a region operation has already been
   * started (the calling thread has already acquired the region-close-guard lock).
   * 
   * NOTE: the boolean passed here has changed. It used to be a boolean that
   * stated whether or not to wait on the lock. Now it is whether it an exclusive
   * lock is requested.
   * 
   * @param row The row actions will be performed against
   * @param readLock is the lock reader or writer. True indicates that a non-exclusive
   * lock is requested
   * @see #startRegionOperation()
   * @see #startRegionOperation(Operation)
   */
  RowLock getRowLock(byte[] row, boolean readLock) throws IOException;

  /**
   * If the given list of row locks is not null, releases all locks.
   */
  void releaseRowLocks(List rowLocks);

  ///////////////////////////////////////////////////////////////////////////
  // Region operations

  /**
   * Perform one or more append operations on a row.
   * @param append
   * @param nonceGroup
   * @param nonce
   * @return result of the operation
   * @throws IOException
   */
  Result append(Append append, long nonceGroup, long nonce) throws IOException;

  /**
   * Perform a batch of mutations.
   * 

   * Note this supports only Put and Delete mutations and will ignore other types passed.
   * @param mutations the list of mutations
   * @param nonceGroup
   * @param nonce
   * @return an array of OperationStatus which internally contains the
   *         OperationStatusCode and the exceptionMessage if any.
   * @throws IOException
   */
  OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce)
      throws IOException;

  /**
   * Replay a batch of mutations.
   * @param mutations mutations to replay.
   * @param replaySeqId
   * @return an array of OperationStatus which internally contains the
   *         OperationStatusCode and the exceptionMessage if any.
   * @throws IOException
   */
   OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException;

  /**
   * Atomically checks if a row/family/qualifier value matches the expected val
   * If it does, it performs the row mutations.  If the passed value is null, t
   * is for the lack of column (ie: non-existence)
   * @param row to check
   * @param family column family to check
   * @param qualifier column qualifier to check
   * @param compareOp the comparison operator
   * @param comparator
   * @param mutation
   * @param writeToWAL
   * @return true if mutation was applied, false otherwise
   * @throws IOException
   */
  boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
      ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException;

  /**
   * Atomically checks if a row/family/qualifier value matches the expected val
   * If it does, it performs the row mutations.  If the passed value is null, t
   * is for the lack of column (ie: non-existence)
   * @param row to check
   * @param family column family to check
   * @param qualifier column qualifier to check
   * @param compareOp the comparison operator
   * @param comparator
   * @param mutations
   * @param writeToWAL
   * @return true if mutation was applied, false otherwise
   * @throws IOException
   */
  boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
      ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL)
      throws IOException;

  /**
   * Deletes the specified cells/row.
   * @param delete
   * @throws IOException
   */
  void delete(Delete delete) throws IOException;

  /**
   * Do a get based on the get parameter.
   * @param get query parameters
   * @return result of the operation
   */
  Result get(Get get) throws IOException;

  /**
   * Do a get based on the get parameter.
   * @param get query parameters
   * @param withCoprocessor invoke coprocessor or not. We don't want to
   * always invoke cp.
   * @return list of cells resulting from the operation
   */
  List get(Get get, boolean withCoprocessor) throws IOException;

  /**
   * Do a get for duplicate non-idempotent operation.
   * @param get query parameters.
   * @param withCoprocessor
   * @param nonceGroup Nonce group.
   * @param nonce Nonce.
   * @return list of cells resulting from the operation
   * @throws IOException
   */
  List get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) throws IOException;

  /**
   * Return all the data for the row that matches row exactly,
   * or the one that immediately preceeds it, at or immediately before
   * ts.
   * @param row
   * @param family
   * @return result of the operation
   * @throws IOException
   */
  Result getClosestRowBefore(byte[] row, byte[] family) throws IOException;

  /**
   * Return an iterator that scans over the HRegion, returning the indicated
   * columns and rows specified by the {@link Scan}.
   * 

   * This Iterator must be closed by the caller.
   *
   * @param scan configured {@link Scan}
   * @return RegionScanner
   * @throws IOException read exceptions
   */
  RegionScanner getScanner(Scan scan) throws IOException;

  /**
   * Return an iterator that scans over the HRegion, returning the indicated columns and rows
   * specified by the {@link Scan}. The scanner will also include the additional scanners passed
   * along with the scanners for the specified Scan instance. Should be careful with the usage to
   * pass additional scanners only within this Region
   * 

   * This Iterator must be closed by the caller.
   *
   * @param scan configured {@link Scan}
   * @param additionalScanners Any additional scanners to be used
   * @return RegionScanner
   * @throws IOException read exceptions
   */
  RegionScanner getScanner(Scan scan, List additionalScanners) throws IOException;

  /**
   * Perform one or more increment operations on a row.
   * @param increment
   * @param nonceGroup
   * @param nonce
   * @return result of the operation
   * @throws IOException
   */
  Result increment(Increment increment, long nonceGroup, long nonce) throws IOException;

  /**
   * Performs multiple mutations atomically on a single row. Currently
   * {@link Put} and {@link Delete} are supported.
   *
   * @param mutations object that specifies the set of mutations to perform atomically
   * @throws IOException
   */
  void mutateRow(RowMutations mutations) throws IOException;

  /**
   * Perform atomic mutations within the region.
   *
   * @param mutations The list of mutations to perform.
   * mutations can contain operations for multiple rows.
   * Caller has to ensure that all rows are contained in this region.
   * @param rowsToLock Rows to lock
   * @param nonceGroup Optional nonce group of the operation (client Id)
   * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
   * If multiple rows are locked care should be taken that
   * rowsToLock is sorted in order to avoid deadlocks.
   * @throws IOException
   */
  void mutateRowsWithLocks(Collection mutations, Collection rowsToLock,
      long nonceGroup, long nonce) throws IOException;

  /**
   * Performs atomic multiple reads and writes on a given row.
   *
   * @param processor The object defines the reads and writes to a row.
   */
  void processRowsWithLocks(RowProcessor processor) throws IOException;

  /**
   * Performs atomic multiple reads and writes on a given row.
   *
   * @param processor The object defines the reads and writes to a row.
   * @param nonceGroup Optional nonce group of the operation (client Id)
   * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
   */
  void processRowsWithLocks(RowProcessor processor, long nonceGroup, long nonce)
      throws IOException;

  /**
   * Performs atomic multiple reads and writes on a given row.
   *
   * @param processor The object defines the reads and writes to a row.
   * @param timeout The timeout of the processor.process() execution
   *                Use a negative number to switch off the time bound
   * @param nonceGroup Optional nonce group of the operation (client Id)
   * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
   */
  void processRowsWithLocks(RowProcessor processor, long timeout, long nonceGroup, long nonce)
      throws IOException;

  /**
   * Puts some data in the table.
   * @param put
   * @throws IOException
   */
  void put(Put put) throws IOException;

  /**
   * Listener class to enable callers of
   * bulkLoadHFile() to perform any necessary
   * pre/post processing of a given bulkload call
   */
  interface BulkLoadListener {

    /**
     * Called before an HFile is actually loaded
     * @param family family being loaded to
     * @param srcPath path of HFile
     * @return final path to be used for actual loading
     * @throws IOException
     */
    String prepareBulkLoad(byte[] family, String srcPath) throws IOException;

    /**
     * Called after a successful HFile load
     * @param family family being loaded to
     * @param srcPath path of HFile
     * @throws IOException
     */
    void doneBulkLoad(byte[] family, String srcPath) throws IOException;

    /**
     * Called after a failed HFile load
     * @param family family being loaded to
     * @param srcPath path of HFile
     * @throws IOException
     */
    void failedBulkLoad(byte[] family, String srcPath) throws IOException;
  }

  /**
   * Attempts to atomically load a group of hfiles.  This is critical for loading
   * rows with multiple column families atomically.
   *
   * @param familyPaths List of Pair<byte[] column family, String hfilePath>
   * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
   * file about to be bulk loaded
   * @param assignSeqId
   * @return true if successful, false if failed recoverably
   * @throws IOException if failed unrecoverably.
   */
  boolean bulkLoadHFiles(Collection> familyPaths, boolean assignSeqId,
      BulkLoadListener bulkLoadListener) throws IOException;

  ///////////////////////////////////////////////////////////////////////////
  // Coprocessors

  /** @return the coprocessor host */
  RegionCoprocessorHost getCoprocessorHost();

  /**
   * Executes a single protocol buffer coprocessor endpoint {@link Service} method using
   * the registered protocol handlers.  {@link Service} implementations must be registered via the
   * {@link Region#registerService(com.google.protobuf.Service)}
   * method before they are available.
   *
   * @param controller an {@code RpcContoller} implementation to pass to the invoked service
   * @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
   *     and parameters for the method invocation
   * @return a protocol buffer {@code Message} instance containing the method's result
   * @throws IOException if no registered service handler is found or an error
   *     occurs during the invocation
   * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service)
   */
  Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException;

  /**
   * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to
   * be available for handling
   * {@link Region#execService(com.google.protobuf.RpcController,
   *    org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls.
   *
   * 

   * Only a single instance may be registered per region for a given {@link Service} subclass (the
   * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}.
   * After the first registration, subsequent calls with the same service name will fail with
   * a return value of {@code false}.
   * 
   * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
   * @return {@code true} if the registration was successful, {@code false}
   * otherwise
   */
  boolean registerService(Service instance);

  ///////////////////////////////////////////////////////////////////////////
  // RowMutation processor support

  /**
   * Check the collection of families for validity.
   * @param families
   * @throws NoSuchColumnFamilyException
   */
  void checkFamilies(Collection families) throws NoSuchColumnFamilyException;

  /**
   * Check the collection of families for valid timestamps
   * @param familyMap
   * @param now current timestamp
   * @throws FailedSanityCheckException
   */
  void checkTimestamps(Map> familyMap, long now)
      throws FailedSanityCheckException;

  /**
   * Prepare a delete for a row mutation processor
   * @param delete The passed delete is modified by this method. WARNING!
   * @throws IOException
   */
  void prepareDelete(Delete delete) throws IOException;

  /**
   * Set up correct timestamps in the KVs in Delete object.
   * Caller should have the row and region locks.
   * @param mutation
   * @param familyCellMap
   * @param now
   * @throws IOException
   */
  void prepareDeleteTimestamps(Mutation mutation, Map> familyCellMap,
      byte[] now) throws IOException;

  /**
   * Replace any cell timestamps set to HConstants#LATEST_TIMESTAMP with the
   * provided current timestamp.
   * @param values
   * @param now
   */
  void updateCellTimestamps(final Iterable> values, final byte[] now)
      throws IOException;

  ///////////////////////////////////////////////////////////////////////////
  // Flushes, compactions, splits, etc.
  // Wizards only, please

  interface FlushResult {
    enum Result {
      FLUSHED_NO_COMPACTION_NEEDED,
      FLUSHED_COMPACTION_NEEDED,
      // Special case where a flush didn't run because there's nothing in the memstores. Used when
      // bulk loading to know when we can still load even if a flush didn't happen.
      CANNOT_FLUSH_MEMSTORE_EMPTY,
      CANNOT_FLUSH
    }

    /** @return the detailed result code */
    Result getResult();

    /** @return true if the memstores were flushed, else false */
    boolean isFlushSucceeded();

    /** @return True if the flush requested a compaction, else false */
    boolean isCompactionNeeded();
  }

  /**
   * Flush the cache.
   *
   * 
When this method is called the cache will be flushed unless:
   * 

   *   the cache is empty
   *   the region is closed.
   *   a flush is already in progress
   *   writes are disabled
   * 
   *
   * This method may block for some time, so it should not be called from a
   * time-sensitive thread.
   * @param force whether we want to force a flush of all stores
   * @return FlushResult indicating whether the flush was successful or not and if
   * the region needs compacting
   *
   * @throws IOException general io exceptions
   * because a snapshot was not properly persisted.
   */
  FlushResult flush(boolean force) throws IOException;

  /**
   * Synchronously compact all stores in the region.
   * 
This operation could block for a long time, so don't call it from a
   * time-sensitive thread.
   * 
Note that no locks are taken to prevent possible conflicts between
   * compaction and splitting activities. The regionserver does not normally compact
   * and split in parallel. However by calling this method you may introduce
   * unexpected and unhandled concurrency. Don't do this unless you know what
   * you are doing.
   *
   * @param majorCompaction True to force a major compaction regardless of thresholds
   * @throws IOException
   */
  void compact(final boolean majorCompaction) throws IOException;

  /**
   * Trigger major compaction on all stores in the region.
   * 
   * Compaction will be performed asynchronously to this call by the RegionServer's
   * CompactSplitThread. See also {@link Store#triggerMajorCompaction()}
   * @throws IOException
   */
  void triggerMajorCompaction() throws IOException;

  /**
   * @return if a given region is in compaction now.
   */
  CompactionState getCompactionState();

  /** Wait for all current flushes and compactions of the region to complete */
  void waitForFlushesAndCompactions();

  /** Wait for all current flushes of the region to complete
   */
  void waitForFlushes();
}