org.apache.hadoop.hbase.regionserver.MemStoreLABImpl Maven / Gradle / Ivy
Show all versions of hbase-server Show documentation
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.nio.ByteBuffer;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
/**
* A memstore-local allocation buffer.
*
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates
* big (2MB) byte[] chunks from and then doles it out to threads that request
* slices into the array.
*
* The purpose of this class is to combat heap fragmentation in the
* regionserver. By ensuring that all Cells in a given memstore refer
* only to large chunks of contiguous memory, we ensure that large blocks
* get freed up when the memstore is flushed.
*
* Without the MSLAB, the byte array allocated during insertion end up
* interleaved throughout the heap, and the old generation gets progressively
* more fragmented until a stop-the-world compacting collection occurs.
*
* TODO: we should probably benchmark whether word-aligning the allocations
* would provide a performance improvement - probably would speed up the
* Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
* anyway.
* The chunks created by this MemStoreLAB can get pooled at {@link ChunkCreator}.
* When the Chunk comes from pool, it can be either an on heap or an off heap backed chunk. The chunks,
* which this MemStoreLAB creates on its own (when no chunk available from pool), those will be
* always on heap backed.
*/
@InterfaceAudience.Private
public class MemStoreLABImpl implements MemStoreLAB {
static final Log LOG = LogFactory.getLog(MemStoreLABImpl.class);
private AtomicReference curChunk = new AtomicReference<>();
// Lock to manage multiple handlers requesting for a chunk
private ReentrantLock lock = new ReentrantLock();
// A set of chunks contained by this memstore LAB
@VisibleForTesting
Set chunks = new ConcurrentSkipListSet();
private final int chunkSize;
private final int maxAlloc;
private final ChunkCreator chunkCreator;
// This flag is for closing this instance, its set when clearing snapshot of
// memstore
private volatile boolean closed = false;
// This flag is for reclaiming chunks. Its set when putting chunks back to
// pool
private AtomicBoolean reclaimed = new AtomicBoolean(false);
// Current count of open scanners which reading data from this MemStoreLAB
private final AtomicInteger openScannerCount = new AtomicInteger();
// Used in testing
public MemStoreLABImpl() {
this(new Configuration());
}
public MemStoreLABImpl(Configuration conf) {
chunkSize = conf.getInt(CHUNK_SIZE_KEY, CHUNK_SIZE_DEFAULT);
maxAlloc = conf.getInt(MAX_ALLOC_KEY, MAX_ALLOC_DEFAULT);
this.chunkCreator = ChunkCreator.getInstance();
// if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
Preconditions.checkArgument(maxAlloc <= chunkSize,
MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
}
@Override
public Cell copyCellInto(Cell cell) {
int size = KeyValueUtil.length(cell);
Preconditions.checkArgument(size >= 0, "negative size");
// Callers should satisfy large allocations directly from JVM since they
// don't cause fragmentation as badly.
if (size > maxAlloc) {
return null;
}
Chunk c = null;
int allocOffset = 0;
while (true) {
// Try to get the chunk
c = getOrMakeChunk();
// we may get null because the some other thread succeeded in getting the lock
// and so the current thread has to try again to make its chunk or grab the chunk
// that the other thread created
// Try to allocate from this chunk
if (c != null) {
allocOffset = c.alloc(size);
if (allocOffset != -1) {
// We succeeded - this is the common case - small alloc
// from a big buffer
break;
}
// not enough space!
// try to retire this chunk
tryRetireChunk(c);
}
}
return copyToChunkCell(cell, c.getData(), allocOffset, size);
}
/**
* Clone the passed cell by copying its data into the passed buf and create a cell with a chunkid
* out of it
*/
private Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) {
int tagsLen = cell.getTagsLength();
if (cell instanceof ExtendedCell) {
((ExtendedCell) cell).write(buf, offset);
} else {
// Normally all Cell impls within Server will be of type ExtendedCell. Just considering the
// other case also. The data fragments within Cell is copied into buf as in KeyValue
// serialization format only.
KeyValueUtil.appendTo(cell, buf, offset, true);
}
// TODO : write the seqid here. For writing seqId we should create a new cell type so
// that seqId is not used as the state
if (tagsLen == 0) {
// When tagsLen is 0, make a NoTagsByteBufferKeyValue version. This is an optimized class
// which directly return tagsLen as 0. So we avoid parsing many length components in
// reading the tagLength stored in the backing buffer. The Memstore addition of every Cell
// call getTagsLength().
return new NoTagByteBufferChunkCell(buf, offset, len, cell.getSequenceId());
} else {
return new ByteBufferChunkCell(buf, offset, len, cell.getSequenceId());
}
}
/**
* Close this instance since it won't be used any more, try to put the chunks
* back to pool
*/
@Override
public void close() {
this.closed = true;
// We could put back the chunks to pool for reusing only when there is no
// opening scanner which will read their data
int count = openScannerCount.get();
if(count == 0) {
recycleChunks();
}
}
/**
* Called when opening a scanner on the data of this MemStoreLAB
*/
@Override
public void incScannerCount() {
this.openScannerCount.incrementAndGet();
}
/**
* Called when closing a scanner on the data of this MemStoreLAB
*/
@Override
public void decScannerCount() {
int count = this.openScannerCount.decrementAndGet();
if (this.closed && count == 0) {
recycleChunks();
}
}
private void recycleChunks() {
if (reclaimed.compareAndSet(false, true)) {
chunkCreator.putbackChunks(chunks);
}
}
/**
* Try to retire the current chunk if it is still
* c
. Postcondition is that curChunk.get()
* != c
* @param c the chunk to retire
* @return true if we won the race to retire the chunk
*/
private void tryRetireChunk(Chunk c) {
curChunk.compareAndSet(c, null);
// If the CAS succeeds, that means that we won the race
// to retire the chunk. We could use this opportunity to
// update metrics on external fragmentation.
//
// If the CAS fails, that means that someone else already
// retired the chunk for us.
}
/**
* Get the current chunk, or, if there is no current chunk,
* allocate a new one from the JVM.
*/
private Chunk getOrMakeChunk() {
// Try to get the chunk
Chunk c = curChunk.get();
if (c != null) {
return c;
}
// No current chunk, so we want to allocate one. We race
// against other allocators to CAS in an uninitialized chunk
// (which is cheap to allocate)
if (lock.tryLock()) {
try {
// once again check inside the lock
c = curChunk.get();
if (c != null) {
return c;
}
c = this.chunkCreator.getChunk();
if (c != null) {
// set the curChunk. No need of CAS as only one thread will be here
curChunk.set(c);
chunks.add(c.getId());
return c;
}
} finally {
lock.unlock();
}
}
return null;
}
@VisibleForTesting
Chunk getCurrentChunk() {
return this.curChunk.get();
}
@VisibleForTesting
BlockingQueue getPooledChunks() {
BlockingQueue pooledChunks = new LinkedBlockingQueue<>();
for (Integer id : this.chunks) {
Chunk chunk = chunkCreator.getChunk(id);
if (chunk != null && chunk.isFromPool()) {
pooledChunks.add(chunk);
}
}
return pooledChunks;
}
}