org.apache.cassandra.utils.memory.BufferPool Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.cassandra.utils.memory;

import java.lang.ref.PhantomReference;
import java.lang.ref.ReferenceQueue;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicLongFieldUpdater;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
import java.util.function.BiPredicate;
import java.util.function.Consumer;
import java.util.function.Supplier;

import com.google.common.annotations.VisibleForTesting;

import net.nicoulaj.compilecommand.annotations.Inline;
import org.apache.cassandra.concurrent.InfiniteLoopExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.netty.util.concurrent.FastThreadLocal;

import org.apache.cassandra.io.compress.BufferType;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.metrics.BufferPoolMetrics;
import org.apache.cassandra.utils.NoSpamLogger;
import org.apache.cassandra.utils.concurrent.Ref;

import static com.google.common.collect.ImmutableList.of;
import static org.apache.cassandra.utils.ExecutorUtils.*;
import static org.apache.cassandra.utils.FBUtilities.prettyPrintMemory;
import static org.apache.cassandra.utils.memory.MemoryUtil.isExactlyDirect;

/**
 * A pool of ByteBuffers that can be recycled to reduce system direct memory fragmentation and improve buffer allocation
 * performance.
 * 
 *
 * Each {@link BufferPool} instance has one {@link GlobalPool} which allocates two kinds of chunks:
 * 

 *     Macro Chunk
 *       
 *         A memory slab that has size of MACRO_CHUNK_SIZE which is 64 * NORMAL_CHUNK_SIZE
 *         Used to allocate normal chunk with size of NORMAL_CHUNK_SIZE
 *       
 *     
 *     Normal Chunk
 *       
 *         Used by {@link LocalPool} to serve buffer allocation
 *         Minimum allocation unit is NORMAL_CHUNK_SIZE / 64
 *       
 *     
 * 
 *
 * {@link GlobalPool} maintains two kinds of freed chunks, fully freed chunks where all buffers are released, and
 * partially freed chunks where some buffers are not released, eg. held by {@link org.apache.cassandra.cache.ChunkCache}.
 * Partially freed chunks are used to improve cache utilization and have lower priority compared to fully freed chunks.
 *
 * 
 *
 * {@link LocalPool} is a thread local pool to serve buffer allocation requests. There are two kinds of local pool:
 * 

 *     Normal Pool:
 *       
 *         used to serve allocation size that is larger than half of NORMAL_ALLOCATION_UNIT but less than NORMAL_CHUNK_SIZE
 *         when there is insufficient space in the local queue, it will request global pool for more normal chunks
 *         when normal chunk is recycled either fully or partially, it will be passed to global pool to be used by other pools
 *       
 *     
 *     Tiny Pool:
 *       
 *         used to serve allocation size that is less than NORMAL_ALLOCATION_UNIT
 *         when there is insufficient space in the local queue, it will request parent normal pool for more tiny chunks
 *         when tiny chunk is fully freed, it will be passed to paretn normal pool and corresponding buffer in the parent normal chunk is freed
 *       
 *     
 * 
 *
 * Note: even though partially freed chunks improves cache utilization when chunk cache holds outstanding buffer for
 * arbitrary period, there is still fragmentation in the partially freed chunk because of non-uniform allocation size.
 * 
 *
 * The lifecycle of a normal Chunk:
 * 
 *    new                      acquire                      release                    recycle
 * ────────→ in GlobalPool ──────────────→ in LocalPool ──────────────→ EVICTED  ──────────────────┐
 *           owner = null                  owner = LocalPool            owner = null               │
 *           status = IN_USE               status = IN_USE              status = EVICTED           │
 *              ready                      serves get / free            serves free only           │
 *                ↑                                                                                │
 *                └────────────────────────────────────────────────────────────────────────────────┘
 * 
 */
public class BufferPool
{
    /** The size of a page aligned buffer, 128KiB */
    public static final int NORMAL_CHUNK_SIZE = 128 << 10;
    public static final int NORMAL_ALLOCATION_UNIT = NORMAL_CHUNK_SIZE / 64;
    public static final int TINY_CHUNK_SIZE = NORMAL_ALLOCATION_UNIT;
    public static final int TINY_ALLOCATION_UNIT = TINY_CHUNK_SIZE / 64;
    public static final int TINY_ALLOCATION_LIMIT = TINY_CHUNK_SIZE / 2;

    private static final Logger logger = LoggerFactory.getLogger(BufferPool.class);
    private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(logger, 15L, TimeUnit.MINUTES);
    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocateDirect(0);

    private volatile Debug debug = Debug.NO_OP;

    protected final String name;
    protected final BufferPoolMetrics metrics;
    private final long memoryUsageThreshold;
    private final String readableMemoryUsageThreshold;

    /**
     * Size of unpooled buffer being allocated outside of buffer pool in bytes.
     */
    private final LongAdder overflowMemoryUsage = new LongAdder();

    /**
     * Size of buffer being used in bytes, including pooled buffer and unpooled buffer.
     */
    private final LongAdder memoryInUse = new LongAdder();

    /**
     * Size of allocated buffer pool slabs in bytes
     */
    private final AtomicLong memoryAllocated = new AtomicLong();

    /** A global pool of chunks (page aligned buffers) */
    private final GlobalPool globalPool;

    /** Allow partially freed chunk to be recycled for allocation*/
    private final boolean recyclePartially;

    /** A thread local pool of chunks, where chunks come from the global pool */
    private final FastThreadLocal localPool = new FastThreadLocal()
    {
        @Override
        protected LocalPool initialValue()
        {
            return new LocalPool();
        }

        protected void onRemoval(LocalPool value)
        {
            value.release();
        }
    };

    private final Set localPoolReferences = Collections.newSetFromMap(new ConcurrentHashMap<>());

    private final ReferenceQueue