org.apache.hadoop.hbase.regionserver.Region Maven / Gradle / Ivy
Show all versions of hbase-server Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.IsolationLevel;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.Service;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.CoprocessorServiceCall;
/**
* Regions store data for a certain region of a table. It stores all columns
* for each row. A given table consists of one or more Regions.
*
* An Region is defined by its table and its key extent.
*
*
Locking at the Region level serves only one purpose: preventing the
* region from being closed (and consequently split) while other operations
* are ongoing. Each row level operation obtains both a row lock and a region
* read lock for the duration of the operation. While a scanner is being
* constructed, getScanner holds a read lock. If the scanner is successfully
* constructed, it holds a read lock until it is closed. A close takes out a
* write lock and consequently will block for ongoing operations and will block
* new operations from starting while the close is in progress.
*/
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
@InterfaceStability.Evolving
public interface Region extends ConfigurationObserver {
///////////////////////////////////////////////////////////////////////////
// Region state
/** @return region information for this region */
HRegionInfo getRegionInfo();
/** @return table descriptor for this region */
TableDescriptor getTableDescriptor();
/** @return true if region is available (not closed and not closing) */
boolean isAvailable();
/** @return true if region is closed */
boolean isClosed();
/** @return True if closing process has started */
boolean isClosing();
/** @return True if region is in recovering state */
boolean isRecovering();
/** @return True if region is read only */
boolean isReadOnly();
/** @return true if region is splittable */
boolean isSplittable();
/**
* @return true if region is mergeable
*/
boolean isMergeable();
/**
* Return the list of Stores managed by this region
*
Use with caution. Exposed for use of fixup utilities.
* @return a list of the Stores managed by this region
*/
List extends Store> getStores();
/**
* Return the Store for the given family
*
Use with caution. Exposed for use of fixup utilities.
* @return the Store for the given family
*/
Store getStore(byte[] family);
/** @return list of store file names for the given families */
List getStoreFileList(byte[][] columns);
/**
* Check the region's underlying store files, open the files that have not
* been opened yet, and remove the store file readers for store files no
* longer available.
* @throws IOException
*/
boolean refreshStoreFiles() throws IOException;
/** @return the latest sequence number that was read from storage when this region was opened */
long getOpenSeqNum();
/** @return the max sequence id of flushed data on this region; no edit in memory will have
* a sequence id that is less that what is returned here.
*/
long getMaxFlushedSeqId();
/** @return the oldest flushed sequence id for the given family; can be beyond
* {@link #getMaxFlushedSeqId()} in case where we've flushed a subset of a regions column
* families
* @deprecated Since version 1.2.0. Exposes too much about our internals; shutting it down.
* Do not use.
*/
@VisibleForTesting
@Deprecated
public long getOldestSeqIdOfStore(byte[] familyName);
/**
* This can be used to determine the last time all files of this region were major compacted.
* @param majorCompactionOnly Only consider HFile that are the result of major compaction
* @return the timestamp of the oldest HFile for all stores of this region
*/
long getOldestHfileTs(boolean majorCompactionOnly) throws IOException;
/**
* @return map of column family names to max sequence id that was read from storage when this
* region was opened
*/
public Map getMaxStoreSeqId();
/** @return true if loading column families on demand by default */
boolean isLoadingCfsOnDemandDefault();
/** @return readpoint considering given IsolationLevel; pass null for default*/
long getReadPoint(IsolationLevel isolationLevel);
/**
* @return readpoint considering given IsolationLevel
* @deprecated Since 1.2.0. Use {@link #getReadPoint(IsolationLevel)} instead.
*/
@Deprecated
long getReadpoint(IsolationLevel isolationLevel);
/**
* @return The earliest time a store in the region was flushed. All
* other stores in the region would have been flushed either at, or
* after this time.
*/
long getEarliestFlushTimeForAllStores();
///////////////////////////////////////////////////////////////////////////
// Metrics
/** @return read requests count for this region */
long getReadRequestsCount();
/**
* Update the read request count for this region
* @param i increment
*/
void updateReadRequestsCount(long i);
/** @return filtered read requests count for this region */
long getFilteredReadRequestsCount();
/** @return write request count for this region */
long getWriteRequestsCount();
/**
* Update the write request count for this region
* @param i increment
*/
void updateWriteRequestsCount(long i);
/**
* @return memstore size for this region, in bytes. It just accounts data size of cells added to
* the memstores of this Region. Means size in bytes for key, value and tags within Cells.
* It wont consider any java heap overhead for the cell objects or any other.
*/
long getMemstoreSize();
/** @return store services for this region, to access services required by store level needs */
RegionServicesForStores getRegionServicesForStores();
/** @return the number of mutations processed bypassing the WAL */
long getNumMutationsWithoutWAL();
/** @return the size of data processed bypassing the WAL, in bytes */
long getDataInMemoryWithoutWAL();
/** @return the number of blocked requests */
long getBlockedRequestsCount();
/** @return the number of checkAndMutate guards that passed */
long getCheckAndMutateChecksPassed();
/** @return the number of failed checkAndMutate guards */
long getCheckAndMutateChecksFailed();
/** @return the MetricsRegion for this region */
MetricsRegion getMetrics();
/** @return the block distribution for all Stores managed by this region */
HDFSBlocksDistribution getHDFSBlocksDistribution();
///////////////////////////////////////////////////////////////////////////
// Locking
// Region read locks
/**
* Operation enum is used in {@link Region#startRegionOperation} and elsewhere to provide
* context for various checks.
*/
enum Operation {
ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT, SNAPSHOT
}
/**
* This method needs to be called before any public call that reads or
* modifies data.
* Acquires a read lock and checks if the region is closing or closed.
* {@link #closeRegionOperation} MUST then always be called after
* the operation has completed, whether it succeeded or failed.
* @throws IOException
*/
void startRegionOperation() throws IOException;
/**
* This method needs to be called before any public call that reads or
* modifies data.
* Acquires a read lock and checks if the region is closing or closed.
*
{@link #closeRegionOperation} MUST then always be called after
* the operation has completed, whether it succeeded or failed.
* @param op The operation is about to be taken on the region
* @throws IOException
*/
void startRegionOperation(Operation op) throws IOException;
/**
* Closes the region operation lock.
* @throws IOException
*/
void closeRegionOperation() throws IOException;
/**
* Closes the region operation lock. This needs to be called in the finally block corresponding
* to the try block of {@link #startRegionOperation(Operation)}
* @throws IOException
*/
void closeRegionOperation(Operation op) throws IOException;
// Row write locks
/**
* Row lock held by a given thread.
* One thread may acquire multiple locks on the same row simultaneously.
* The locks must be released by calling release() from the same thread.
*/
public interface RowLock {
/**
* Release the given lock. If there are no remaining locks held by the current thread
* then unlock the row and allow other threads to acquire the lock.
* @throws IllegalArgumentException if called by a different thread than the lock owning
* thread
*/
void release();
}
/**
*
* Get a row lock for the specified row. All locks are reentrant.
*
* Before calling this function make sure that a region operation has already been
* started (the calling thread has already acquired the region-close-guard lock).
*
* NOTE: the boolean passed here has changed. It used to be a boolean that
* stated whether or not to wait on the lock. Now it is whether it an exclusive
* lock is requested.
*
* @param row The row actions will be performed against
* @param readLock is the lock reader or writer. True indicates that a non-exclusive
* lock is requested
* @see #startRegionOperation()
* @see #startRegionOperation(Operation)
*/
RowLock getRowLock(byte[] row, boolean readLock) throws IOException;
/**
* If the given list of row locks is not null, releases all locks.
*/
void releaseRowLocks(List rowLocks);
///////////////////////////////////////////////////////////////////////////
// Region operations
/**
* Perform one or more append operations on a row.
* @param append
* @param nonceGroup
* @param nonce
* @return result of the operation
* @throws IOException
*/
Result append(Append append, long nonceGroup, long nonce) throws IOException;
/**
* Perform a batch of mutations.
*
* Note this supports only Put and Delete mutations and will ignore other types passed.
* @param mutations the list of mutations
* @param nonceGroup
* @param nonce
* @return an array of OperationStatus which internally contains the
* OperationStatusCode and the exceptionMessage if any.
* @throws IOException
*/
OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce)
throws IOException;
/**
* Replay a batch of mutations.
* @param mutations mutations to replay.
* @param replaySeqId
* @return an array of OperationStatus which internally contains the
* OperationStatusCode and the exceptionMessage if any.
* @throws IOException
*/
OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException;
/**
* Atomically checks if a row/family/qualifier value matches the expected value and if it does,
* it performs the mutation. If the passed value is null, the lack of column value
* (ie: non-existence) is used. See checkAndRowMutate to do many checkAndPuts at a time on a
* single row.
* @param row to check
* @param family column family to check
* @param qualifier column qualifier to check
* @param op the comparison operator
* @param comparator
* @param mutation
* @param writeToWAL
* @return true if mutation was applied, false otherwise
* @throws IOException
*/
boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOperator op,
ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException;
/**
* Atomically checks if a row/family/qualifier value matches the expected values and if it does,
* it performs the row mutations. If the passed value is null, the lack of column value
* (ie: non-existence) is used. Use to do many mutations on a single row. Use checkAndMutate
* to do one checkAndMutate at a time.
* @param row to check
* @param family column family to check
* @param qualifier column qualifier to check
* @param op the comparison operator
* @param comparator
* @param mutations
* @param writeToWAL
* @return true if mutations were applied, false otherwise
* @throws IOException
*/
boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOperator op,
ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL)
throws IOException;
/**
* Deletes the specified cells/row.
* @param delete
* @throws IOException
*/
void delete(Delete delete) throws IOException;
/**
* Do a get based on the get parameter.
* @param get query parameters
* @return result of the operation
*/
Result get(Get get) throws IOException;
/**
* Do a get based on the get parameter.
* @param get query parameters
* @param withCoprocessor invoke coprocessor or not. We don't want to
* always invoke cp.
* @return list of cells resulting from the operation
*/
List get(Get get, boolean withCoprocessor) throws IOException;
/**
* Do a get for duplicate non-idempotent operation.
* @param get query parameters.
* @param withCoprocessor
* @param nonceGroup Nonce group.
* @param nonce Nonce.
* @return list of cells resulting from the operation
* @throws IOException
*/
List get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) throws IOException;
/**
* Return an iterator that scans over the HRegion, returning the indicated
* columns and rows specified by the {@link Scan}.
*
* This Iterator must be closed by the caller.
*
* @param scan configured {@link Scan}
* @return RegionScanner
* @throws IOException read exceptions
*/
RegionScanner getScanner(Scan scan) throws IOException;
/**
* Return an iterator that scans over the HRegion, returning the indicated columns and rows
* specified by the {@link Scan}. The scanner will also include the additional scanners passed
* along with the scanners for the specified Scan instance. Should be careful with the usage to
* pass additional scanners only within this Region
*
* This Iterator must be closed by the caller.
*
* @param scan configured {@link Scan}
* @param additionalScanners Any additional scanners to be used
* @return RegionScanner
* @throws IOException read exceptions
*/
RegionScanner getScanner(Scan scan, List additionalScanners) throws IOException;
/** The comparator to be used with the region */
CellComparator getCellComparator();
/**
* Perform one or more increment operations on a row.
* @param increment
* @param nonceGroup
* @param nonce
* @return result of the operation
* @throws IOException
*/
Result increment(Increment increment, long nonceGroup, long nonce) throws IOException;
/**
* Performs multiple mutations atomically on a single row. Currently
* {@link Put} and {@link Delete} are supported.
*
* @param mutations object that specifies the set of mutations to perform atomically
* @throws IOException
*/
void mutateRow(RowMutations mutations) throws IOException;
/**
* Perform atomic mutations within the region.
*
* @param mutations The list of mutations to perform.
* mutations can contain operations for multiple rows.
* Caller has to ensure that all rows are contained in this region.
* @param rowsToLock Rows to lock
* @param nonceGroup Optional nonce group of the operation (client Id)
* @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
* If multiple rows are locked care should be taken that
* rowsToLock is sorted in order to avoid deadlocks.
* @throws IOException
*/
void mutateRowsWithLocks(Collection mutations, Collection rowsToLock,
long nonceGroup, long nonce) throws IOException;
/**
* Performs atomic multiple reads and writes on a given row.
*
* @param processor The object defines the reads and writes to a row.
*/
void processRowsWithLocks(RowProcessor,?> processor) throws IOException;
/**
* Performs atomic multiple reads and writes on a given row.
*
* @param processor The object defines the reads and writes to a row.
* @param nonceGroup Optional nonce group of the operation (client Id)
* @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
*/
void processRowsWithLocks(RowProcessor,?> processor, long nonceGroup, long nonce)
throws IOException;
/**
* Performs atomic multiple reads and writes on a given row.
*
* @param processor The object defines the reads and writes to a row.
* @param timeout The timeout of the processor.process() execution
* Use a negative number to switch off the time bound
* @param nonceGroup Optional nonce group of the operation (client Id)
* @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
*/
void processRowsWithLocks(RowProcessor,?> processor, long timeout, long nonceGroup, long nonce)
throws IOException;
/**
* Puts some data in the table.
* @param put
* @throws IOException
*/
void put(Put put) throws IOException;
/**
* Listener class to enable callers of
* bulkLoadHFile() to perform any necessary
* pre/post processing of a given bulkload call
*/
interface BulkLoadListener {
/**
* Called before an HFile is actually loaded
* @param family family being loaded to
* @param srcPath path of HFile
* @return final path to be used for actual loading
* @throws IOException
*/
String prepareBulkLoad(byte[] family, String srcPath, boolean copyFile)
throws IOException;
/**
* Called after a successful HFile load
* @param family family being loaded to
* @param srcPath path of HFile
* @throws IOException
*/
void doneBulkLoad(byte[] family, String srcPath) throws IOException;
/**
* Called after a failed HFile load
* @param family family being loaded to
* @param srcPath path of HFile
* @throws IOException
*/
void failedBulkLoad(byte[] family, String srcPath) throws IOException;
}
/**
* Attempts to atomically load a group of hfiles. This is critical for loading
* rows with multiple column families atomically.
*
* @param familyPaths List of Pair<byte[] column family, String hfilePath>
* @param bulkLoadListener Internal hooks enabling massaging/preparation of a
* file about to be bulk loaded
* @param assignSeqId
* @return Map from family to List of store file paths if successful, null if failed recoverably
* @throws IOException if failed unrecoverably.
*/
Map> bulkLoadHFiles(Collection> familyPaths,
boolean assignSeqId, BulkLoadListener bulkLoadListener) throws IOException;
/**
* Attempts to atomically load a group of hfiles. This is critical for loading
* rows with multiple column families atomically.
*
* @param familyPaths List of Pair<byte[] column family, String hfilePath>
* @param assignSeqId
* @param bulkLoadListener Internal hooks enabling massaging/preparation of a
* file about to be bulk loaded
* @param copyFile always copy hfiles if true
* @return Map from family to List of store file paths if successful, null if failed recoverably
* @throws IOException if failed unrecoverably.
*/
Map> bulkLoadHFiles(Collection> familyPaths,
boolean assignSeqId, BulkLoadListener bulkLoadListener, boolean copyFile) throws IOException;
///////////////////////////////////////////////////////////////////////////
// Coprocessors
/** @return the coprocessor host */
RegionCoprocessorHost getCoprocessorHost();
/**
* Executes a single protocol buffer coprocessor endpoint {@link Service} method using
* the registered protocol handlers. {@link Service} implementations must be registered via the
* {@link Region#registerService(com.google.protobuf.Service)}
* method before they are available.
*
* @param controller an {@code RpcContoller} implementation to pass to the invoked service
* @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
* and parameters for the method invocation
* @return a protocol buffer {@code Message} instance containing the method's result
* @throws IOException if no registered service handler is found or an error
* occurs during the invocation
* @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service)
*/
com.google.protobuf.Message execService(com.google.protobuf.RpcController controller,
CoprocessorServiceCall call)
throws IOException;
/**
* Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to
* be available for handling Region#execService(com.google.protobuf.RpcController,
* org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall) calls.
*
*
* Only a single instance may be registered per region for a given {@link Service} subclass (the
* instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}.
* After the first registration, subsequent calls with the same service name will fail with
* a return value of {@code false}.
*
* @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
* @return {@code true} if the registration was successful, {@code false}
* otherwise
*/
boolean registerService(com.google.protobuf.Service instance);
///////////////////////////////////////////////////////////////////////////
// RowMutation processor support
/**
* Check the collection of families for validity.
* @param families
* @throws NoSuchColumnFamilyException
*/
void checkFamilies(Collection families) throws NoSuchColumnFamilyException;
/**
* Check the collection of families for valid timestamps
* @param familyMap
* @param now current timestamp
* @throws FailedSanityCheckException
*/
void checkTimestamps(Map> familyMap, long now)
throws FailedSanityCheckException;
/**
* Prepare a delete for a row mutation processor
* @param delete The passed delete is modified by this method. WARNING!
* @throws IOException
*/
void prepareDelete(Delete delete) throws IOException;
/**
* Set up correct timestamps in the KVs in Delete object.
* Caller should have the row and region locks.
* @param mutation
* @param familyCellMap
* @param now
* @throws IOException
*/
void prepareDeleteTimestamps(Mutation mutation, Map> familyCellMap,
byte[] now) throws IOException;
/**
* Replace any cell timestamps set to {@link org.apache.hadoop.hbase.HConstants#LATEST_TIMESTAMP}
* provided current timestamp.
* @param values
* @param now
*/
void updateCellTimestamps(final Iterable> values, final byte[] now)
throws IOException;
///////////////////////////////////////////////////////////////////////////
// Flushes, compactions, splits, etc.
// Wizards only, please
interface FlushResult {
enum Result {
FLUSHED_NO_COMPACTION_NEEDED,
FLUSHED_COMPACTION_NEEDED,
// Special case where a flush didn't run because there's nothing in the memstores. Used when
// bulk loading to know when we can still load even if a flush didn't happen.
CANNOT_FLUSH_MEMSTORE_EMPTY,
CANNOT_FLUSH
}
/** @return the detailed result code */
Result getResult();
/** @return true if the memstores were flushed, else false */
boolean isFlushSucceeded();
/** @return True if the flush requested a compaction, else false */
boolean isCompactionNeeded();
}
/**
* Flush the cache.
*
* When this method is called the cache will be flushed unless:
*
* - the cache is empty
* - the region is closed.
* - a flush is already in progress
* - writes are disabled
*
*
* This method may block for some time, so it should not be called from a
* time-sensitive thread.
* @param force whether we want to force a flush of all stores
* @return FlushResult indicating whether the flush was successful or not and if
* the region needs compacting
*
* @throws IOException general io exceptions
* because a snapshot was not properly persisted.
*/
FlushResult flush(boolean force) throws IOException;
/**
* Synchronously compact all stores in the region.
* This operation could block for a long time, so don't call it from a
* time-sensitive thread.
* Note that no locks are taken to prevent possible conflicts between
* compaction and splitting activities. The regionserver does not normally compact
* and split in parallel. However by calling this method you may introduce
* unexpected and unhandled concurrency. Don't do this unless you know what
* you are doing.
*
* @param majorCompaction True to force a major compaction regardless of thresholds
* @throws IOException
*/
void compact(final boolean majorCompaction) throws IOException;
/**
* Trigger major compaction on all stores in the region.
*
* Compaction will be performed asynchronously to this call by the RegionServer's
* CompactSplitThread. See also {@link Store#triggerMajorCompaction()}
* @throws IOException
*/
void triggerMajorCompaction() throws IOException;
/**
* @return if a given region is in compaction now.
*/
CompactionState getCompactionState();
/**
* Request compaction on this region.
*/
void requestCompaction(String why, int priority, CompactionLifeCycleTracker tracker, User user)
throws IOException;
/**
* Request compaction for the given family
*/
void requestCompaction(byte[] family, String why, int priority,
CompactionLifeCycleTracker tracker, User user) throws IOException;
/** Wait for all current flushes and compactions of the region to complete */
void waitForFlushesAndCompactions();
/** Wait for all current flushes of the region to complete
*/
void waitForFlushes();
}
| |