All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.regionserver.MemStoreLABImpl Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import com.google.errorprone.annotations.RestrictedApi;
import java.nio.ByteBuffer;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.nio.RefCnt;
import org.apache.hadoop.hbase.regionserver.CompactingMemStore.IndexType;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;

/**
 * A memstore-local allocation buffer.
 * 

* The MemStoreLAB is basically a bump-the-pointer allocator that allocates big (2MB) byte[] chunks * from and then doles it out to threads that request slices into the array. *

* The purpose of this class is to combat heap fragmentation in the regionserver. By ensuring that * all Cells in a given memstore refer only to large chunks of contiguous memory, we ensure that * large blocks get freed up when the memstore is flushed. *

* Without the MSLAB, the byte array allocated during insertion end up interleaved throughout the * heap, and the old generation gets progressively more fragmented until a stop-the-world compacting * collection occurs. *

* TODO: we should probably benchmark whether word-aligning the allocations would provide a * performance improvement - probably would speed up the Bytes.toLong/Bytes.toInt calls in KeyValue, * but some of those are cached anyway. The chunks created by this MemStoreLAB can get pooled at * {@link ChunkCreator}. When the Chunk comes from pool, it can be either an on heap or an off heap * backed chunk. The chunks, which this MemStoreLAB creates on its own (when no chunk available from * pool), those will be always on heap backed. *

* NOTE:if user requested to work with MSLABs (whether on- or off-heap), in * {@link CompactingMemStore} ctor, the {@link CompactingMemStore#indexType} could only be * {@link IndexType#CHUNK_MAP},that is to say the immutable segments using MSLABs are going to use * {@link CellChunkMap} as their index. */ @InterfaceAudience.Private public class MemStoreLABImpl implements MemStoreLAB { static final Logger LOG = LoggerFactory.getLogger(MemStoreLABImpl.class); private AtomicReference currChunk = new AtomicReference<>(); // Lock to manage multiple handlers requesting for a chunk private ReentrantLock lock = new ReentrantLock(); // A set of chunks contained by this memstore LAB Set chunks = new ConcurrentSkipListSet(); private final int dataChunkSize; private final int maxAlloc; private final ChunkCreator chunkCreator; // This flag is for closing this instance, its set when clearing snapshot of // memstore private final AtomicBoolean closed = new AtomicBoolean(false);; // This flag is for reclaiming chunks. Its set when putting chunks back to // pool private final AtomicBoolean reclaimed = new AtomicBoolean(false); /** * Its initial value is 1, so it is one bigger than the current count of open scanners which * reading data from this MemStoreLAB. */ private final RefCnt refCnt; // Used in testing public MemStoreLABImpl() { this(new Configuration()); } public MemStoreLABImpl(Configuration conf) { dataChunkSize = conf.getInt(CHUNK_SIZE_KEY, CHUNK_SIZE_DEFAULT); maxAlloc = conf.getInt(MAX_ALLOC_KEY, MAX_ALLOC_DEFAULT); this.chunkCreator = ChunkCreator.getInstance(); // if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one! Preconditions.checkArgument(maxAlloc <= dataChunkSize, MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY); this.refCnt = RefCnt.create(() -> { recycleChunks(); }); } @Override public Cell copyCellInto(Cell cell) { // See head of copyBBECellInto for how it differs from copyCellInto return (cell instanceof ByteBufferExtendedCell) ? copyBBECellInto((ByteBufferExtendedCell) cell, maxAlloc) : copyCellInto(cell, maxAlloc); } /** * When a cell's size is too big (bigger than maxAlloc), copyCellInto does not allocate it on * MSLAB. Since the process of flattening to CellChunkMap assumes that all cells are allocated on * MSLAB, during this process, the big cells are copied into MSLAB using this method. */ @Override public Cell forceCopyOfBigCellInto(Cell cell) { int size = Segment.getCellLength(cell); Preconditions.checkArgument(size >= 0, "negative size"); if (size + ChunkCreator.SIZEOF_CHUNK_HEADER <= dataChunkSize) { // Using copyCellInto for cells which are bigger than the original maxAlloc return copyCellInto(cell, dataChunkSize); } else { Chunk c = getNewExternalChunk(size); int allocOffset = c.alloc(size); return copyToChunkCell(cell, c.getData(), allocOffset, size); } } /** * Mostly a duplicate of {@link #copyCellInto(Cell, int)}} done for perf sake. It presumes * ByteBufferExtendedCell instead of Cell so we deal with a specific type rather than the super * generic Cell. Removes instanceof checks. Shrinkage is enough to make this inline where before * it was too big. Uses less CPU. See HBASE-20875 for evidence. * @see #copyCellInto(Cell, int) */ private Cell copyBBECellInto(ByteBufferExtendedCell cell, int maxAlloc) { int size = cell.getSerializedSize(); Preconditions.checkArgument(size >= 0, "negative size"); // Callers should satisfy large allocations from JVM heap so limit fragmentation. if (size > maxAlloc) { return null; } Chunk c = null; int allocOffset = 0; while (true) { // Try to get the chunk c = getOrMakeChunk(); // We may get null because the some other thread succeeded in getting the lock // and so the current thread has to try again to make its chunk or grab the chunk // that the other thread created // Try to allocate from this chunk if (c != null) { allocOffset = c.alloc(size); if (allocOffset != -1) { // We succeeded - this is the common case - small alloc // from a big buffer break; } // not enough space! // try to retire this chunk tryRetireChunk(c); } } return copyBBECToChunkCell(cell, c.getData(), allocOffset, size); } /** * @see #copyBBECellInto(ByteBufferExtendedCell, int) */ private Cell copyCellInto(Cell cell, int maxAlloc) { int size = Segment.getCellLength(cell); Preconditions.checkArgument(size >= 0, "negative size"); // Callers should satisfy large allocations directly from JVM since they // don't cause fragmentation as badly. if (size > maxAlloc) { return null; } Chunk c = null; int allocOffset = 0; while (true) { // Try to get the chunk c = getOrMakeChunk(); // we may get null because the some other thread succeeded in getting the lock // and so the current thread has to try again to make its chunk or grab the chunk // that the other thread created // Try to allocate from this chunk if (c != null) { allocOffset = c.alloc(size); if (allocOffset != -1) { // We succeeded - this is the common case - small alloc // from a big buffer break; } // not enough space! // try to retire this chunk tryRetireChunk(c); } } return copyToChunkCell(cell, c.getData(), allocOffset, size); } /** * Clone the passed cell by copying its data into the passed buf and create a cell with a chunkid * out of it * @see #copyBBECToChunkCell(ByteBufferExtendedCell, ByteBuffer, int, int) */ private static Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) { int tagsLen = cell.getTagsLength(); if (cell instanceof ExtendedCell) { ((ExtendedCell) cell).write(buf, offset); } else { // Normally all Cell impls within Server will be of type ExtendedCell. Just considering the // other case also. The data fragments within Cell is copied into buf as in KeyValue // serialization format only. KeyValueUtil.appendTo(cell, buf, offset, true); } return createChunkCell(buf, offset, len, tagsLen, cell.getSequenceId()); } /** * Clone the passed cell by copying its data into the passed buf and create a cell with a chunkid * out of it * @see #copyToChunkCell(Cell, ByteBuffer, int, int) */ private static Cell copyBBECToChunkCell(ByteBufferExtendedCell cell, ByteBuffer buf, int offset, int len) { int tagsLen = cell.getTagsLength(); cell.write(buf, offset); return createChunkCell(buf, offset, len, tagsLen, cell.getSequenceId()); } private static Cell createChunkCell(ByteBuffer buf, int offset, int len, int tagsLen, long sequenceId) { // TODO : write the seqid here. For writing seqId we should create a new cell type so // that seqId is not used as the state if (tagsLen == 0) { // When tagsLen is 0, make a NoTagsByteBufferKeyValue version. This is an optimized class // which directly return tagsLen as 0. So we avoid parsing many length components in // reading the tagLength stored in the backing buffer. The Memstore addition of every Cell // call getTagsLength(). return new NoTagByteBufferChunkKeyValue(buf, offset, len, sequenceId); } else { return new ByteBufferChunkKeyValue(buf, offset, len, sequenceId); } } /** * Close this instance since it won't be used any more, try to put the chunks back to pool */ @Override public void close() { if (!this.closed.compareAndSet(false, true)) { return; } // We could put back the chunks to pool for reusing only when there is no // opening scanner which will read their data this.refCnt.release(); } @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*/src/test/.*") int getRefCntValue() { return this.refCnt.refCnt(); } /** * Called when opening a scanner on the data of this MemStoreLAB */ @Override public void incScannerCount() { this.refCnt.retain(); } /** * Called when closing a scanner on the data of this MemStoreLAB */ @Override public void decScannerCount() { this.refCnt.release(); } private void recycleChunks() { if (reclaimed.compareAndSet(false, true)) { chunkCreator.putbackChunks(chunks); chunks.clear(); } } /** * Try to retire the current chunk if it is still c. Postcondition is that * curChunk.get() != c * @param c the chunk to retire */ private void tryRetireChunk(Chunk c) { currChunk.compareAndSet(c, null); // If the CAS succeeds, that means that we won the race // to retire the chunk. We could use this opportunity to // update metrics on external fragmentation. // // If the CAS fails, that means that someone else already // retired the chunk for us. } /** * Get the current chunk, or, if there is no current chunk, allocate a new one from the JVM. */ private Chunk getOrMakeChunk() { // Try to get the chunk Chunk c = currChunk.get(); if (c != null) { return c; } // No current chunk, so we want to allocate one. We race // against other allocators to CAS in an uninitialized chunk // (which is cheap to allocate) if (lock.tryLock()) { try { // once again check inside the lock c = currChunk.get(); if (c != null) { return c; } c = this.chunkCreator.getChunk(); if (c != null) { // set the curChunk. No need of CAS as only one thread will be here currChunk.set(c); chunks.add(c.getId()); return c; } } finally { lock.unlock(); } } return null; } /* * Returning a new pool chunk, without replacing current chunk, meaning MSLABImpl does not make * the returned chunk as CurChunk. The space on this chunk will be allocated externally. The * interface is only for external callers. */ @Override public Chunk getNewExternalChunk(ChunkCreator.ChunkType chunkType) { switch (chunkType) { case INDEX_CHUNK: case DATA_CHUNK: Chunk c = this.chunkCreator.getChunk(chunkType); chunks.add(c.getId()); return c; case JUMBO_CHUNK: // a jumbo chunk doesn't have a fixed size default: return null; } } /* * Returning a new chunk, without replacing current chunk, meaning MSLABImpl does not make the * returned chunk as CurChunk. The space on this chunk will be allocated externally. The interface * is only for external callers. Chunks from pools are not allocated from here, since they have * fixed sizes */ @Override public Chunk getNewExternalChunk(int size) { int allocSize = size + ChunkCreator.SIZEOF_CHUNK_HEADER; if (allocSize <= ChunkCreator.getInstance().getChunkSize()) { return getNewExternalChunk(ChunkCreator.ChunkType.DATA_CHUNK); } else { Chunk c = this.chunkCreator.getJumboChunk(size); chunks.add(c.getId()); return c; } } @Override public boolean isOnHeap() { return !isOffHeap(); } @Override public boolean isOffHeap() { return this.chunkCreator.isOffheap(); } Chunk getCurrentChunk() { return currChunk.get(); } BlockingQueue getPooledChunks() { BlockingQueue pooledChunks = new LinkedBlockingQueue<>(); for (Integer id : this.chunks) { Chunk chunk = chunkCreator.getChunk(id); if (chunk != null && chunk.isFromPool()) { pooledChunks.add(chunk); } } return pooledChunks; } Integer getNumOfChunksReturnedToPool(Set chunksId) { int i = 0; for (Integer id : chunksId) { if (chunkCreator.isChunkInPool(id)) { i++; } } return i; } boolean isReclaimed() { return reclaimed.get(); } boolean isClosed() { return closed.get(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy