All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager Maven / Gradle / Ivy

There is a newer version: 4.3.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.commitlog;

import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicLong;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Iterables;
import com.google.common.util.concurrent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.Schema;
import org.apache.cassandra.db.*;
import org.apache.cassandra.utils.*;
import org.apache.cassandra.utils.concurrent.WaitQueue;

import static org.apache.cassandra.db.commitlog.CommitLogSegment.Allocation;

/**
 * Performs eager-creation of commit log segments in a background thread. All the
 * public methods are thread safe.
 */
public abstract class AbstractCommitLogSegmentManager
{
    static final Logger logger = LoggerFactory.getLogger(AbstractCommitLogSegmentManager.class);

    // Queue of work to be done by the manager thread, also used to wake the thread to perform segment allocation.
    private final BlockingQueue segmentManagementTasks = new LinkedBlockingQueue<>();

    /** Segments that are ready to be used. Head of the queue is the one we allocate writes to */
    private final ConcurrentLinkedQueue availableSegments = new ConcurrentLinkedQueue<>();

    /** Active segments, containing unflushed data */
    private final ConcurrentLinkedQueue activeSegments = new ConcurrentLinkedQueue<>();

    /** The segment we are currently allocating commit log records to */
    protected volatile CommitLogSegment allocatingFrom = null;

    private final WaitQueue hasAvailableSegments = new WaitQueue();

    final String storageDirectory;

    /**
     * Tracks commitlog size, in multiples of the segment size.  We need to do this so we can "promise" size
     * adjustments ahead of actually adding/freeing segments on disk, so that the "evict oldest segment" logic
     * can see the effect of recycling segments immediately (even though they're really happening asynchronously
     * on the manager thread, which will take a ms or two).
     */
    private final AtomicLong size = new AtomicLong();

    /**
     * New segment creation is initially disabled because we'll typically get some "free" segments
     * recycled after log replay.
     */
    volatile boolean createReserveSegments = false;

    private Thread managerThread;
    protected volatile boolean run = true;
    protected final CommitLog commitLog;

    private static final SimpleCachedBufferPool bufferPool =
        new SimpleCachedBufferPool(DatabaseDescriptor.getCommitLogMaxCompressionBuffersInPool(), DatabaseDescriptor.getCommitLogSegmentSize());

    AbstractCommitLogSegmentManager(final CommitLog commitLog, String storageDirectory)
    {
        this.commitLog = commitLog;
        this.storageDirectory = storageDirectory;
    }

    void start()
    {
        // The run loop for the manager thread
        Runnable runnable = new WrappedRunnable()
        {
            public void runMayThrow() throws Exception
            {
                while (run)
                {
                    try
                    {
                        Runnable task = segmentManagementTasks.poll();
                        if (task == null)
                        {
                            // if we have no more work to do, check if we should create a new segment
                            if (!atSegmentLimit() &&
                                availableSegments.isEmpty() &&
                                (activeSegments.isEmpty() || createReserveSegments))
                            {
                                logger.trace("No segments in reserve; creating a fresh one");
                                // TODO : some error handling in case we fail to create a new segment
                                availableSegments.add(createSegment());
                                hasAvailableSegments.signalAll();
                            }

                            // flush old Cfs if we're full
                            long unused = unusedCapacity();
                            if (unused < 0)
                            {
                                List segmentsToRecycle = new ArrayList<>();
                                long spaceToReclaim = 0;
                                for (CommitLogSegment segment : activeSegments)
                                {
                                    if (segment == allocatingFrom)
                                        break;
                                    segmentsToRecycle.add(segment);
                                    spaceToReclaim += DatabaseDescriptor.getCommitLogSegmentSize();
                                    if (spaceToReclaim + unused >= 0)
                                        break;
                                }
                                flushDataFrom(segmentsToRecycle, false);
                            }

                            // Since we're operating on a "null" allocation task, block here for the next task on the
                            // queue rather than looping, grabbing another null, and repeating the above work.
                            try
                            {
                                task = segmentManagementTasks.take();
                            }
                            catch (InterruptedException e)
                            {
                                throw new AssertionError();
                            }
                        }
                        task.run();
                    }
                    catch (Throwable t)
                    {
                        JVMStabilityInspector.inspectThrowable(t);
                        if (!CommitLog.handleCommitError("Failed managing commit log segments", t))
                            return;
                        // sleep some arbitrary period to avoid spamming CL
                        Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
                    }
                }
            }

            private boolean atSegmentLimit()
            {
                return CommitLogSegment.usesBufferPool(commitLog) && bufferPool.atLimit();
            }
        };

        run = true;

        managerThread = new Thread(runnable, "COMMIT-LOG-ALLOCATOR");
        managerThread.start();
    }


    /**
     * Shut down the CLSM. Used both during testing and during regular shutdown, so needs to stop everything.
     */
    public abstract void shutdown();

    /**
     * Allocate a segment within this CLSM. Should either succeed or throw.
     */
    public abstract Allocation allocate(Mutation mutation, int size);

    /**
     * The recovery and replay process replays mutations into memtables and flushes them to disk. Individual CLSM
     * decide what to do with those segments on disk after they've been replayed.
     */
    abstract void handleReplayedSegment(final File file);

    /**
     * Hook to allow segment managers to track state surrounding creation of new segments. Onl perform as task submit
     * to segment manager so it's performed on segment management thread.
     */
    abstract CommitLogSegment createSegment();

    /**
     * Indicates that a segment file has been flushed and is no longer needed. Only perform as task submit to segment
     * manager so it's performend on segment management thread, or perform while segment management thread is shutdown
     * during testing resets.
     *
     * @param segment segment to be discarded
     * @param delete  whether or not the segment is safe to be deleted.
     */
    abstract void discard(CommitLogSegment segment, boolean delete);


    /**
     * Grab the current CommitLogSegment we're allocating from. Also serves as a utility method to block while the allocator
     * is working on initial allocation of a CommitLogSegment.
     */
    CommitLogSegment allocatingFrom()
    {
        CommitLogSegment r = allocatingFrom;
        if (r == null)
        {
            advanceAllocatingFrom(null);
            r = allocatingFrom;
        }
        return r;
    }

    /**
     * Fetches a new segment from the queue, signaling the management thread to create a new one if necessary, and "activates" it.
     * Blocks until a new segment is allocated and the thread requesting an advanceAllocatingFrom is signalled.
     *
     * WARNING: Assumes segment management thread always succeeds in allocating a new segment or kills the JVM.
     */
    protected void advanceAllocatingFrom(CommitLogSegment old)
    {
        while (true)
        {
            CommitLogSegment next;
            synchronized (this)
            {
                // do this in a critical section so we can atomically remove from availableSegments and add to allocatingFrom/activeSegments
                // see https://issues.apache.org/jira/browse/CASSANDRA-6557?focusedCommentId=13874432&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13874432
                if (allocatingFrom != old)
                    return;
                next = availableSegments.poll();
                if (next != null)
                {
                    allocatingFrom = next;
                    activeSegments.add(next);
                }
            }

            if (next != null)
            {
                if (old != null)
                {
                    // Now we can run the user defined command just after switching to the new commit log.
                    // (Do this here instead of in the recycle call so we can get a head start on the archive.)
                    commitLog.archiver.maybeArchive(old);

                    // ensure we don't continue to use the old file; not strictly necessary, but cleaner to enforce it
                    old.discardUnusedTail();
                }

                // request that the CL be synced out-of-band, as we've finished a segment
                commitLog.requestExtraSync();
                return;
            }

            // no more segments, so register to receive a signal when not empty
            WaitQueue.Signal signal = hasAvailableSegments.register(commitLog.metrics.waitingOnSegmentAllocation.time());

            // trigger the management thread; this must occur after registering
            // the signal to ensure we are woken by any new segment creation
            wakeManager();

            // check if the queue has already been added to before waiting on the signal, to catch modifications
            // that happened prior to registering the signal; *then* check to see if we've been beaten to making the change
            if (!availableSegments.isEmpty() || allocatingFrom != old)
            {
                signal.cancel();
                // if we've been beaten, just stop immediately
                if (allocatingFrom != old)
                    return;
                // otherwise try again, as there should be an available segment
                continue;
            }

            // can only reach here if the queue hasn't been inserted into
            // before we registered the signal, as we only remove items from the queue
            // after updating allocatingFrom. Can safely block until we are signalled
            // by the allocator that new segments have been published
            signal.awaitUninterruptibly();
        }
    }

    protected void wakeManager()
    {
        // put a NO-OP on the queue, to trigger management thread (and create a new segment if necessary)
        segmentManagementTasks.add(Runnables.doNothing());
    }

    /**
     * Switch to a new segment, regardless of how much is left in the current one.
     *
     * Flushes any dirty CFs for this segment and any older segments, and then recycles
     * the segments
     */
    void forceRecycleAll(Iterable droppedCfs)
    {
        List segmentsToRecycle = new ArrayList<>(activeSegments);
        CommitLogSegment last = segmentsToRecycle.get(segmentsToRecycle.size() - 1);
        advanceAllocatingFrom(last);

        // wait for the commit log modifications
        last.waitForModifications();

        // make sure the writes have materialized inside of the memtables by waiting for all outstanding writes
        // on the relevant keyspaces to complete
        Keyspace.writeOrder.awaitNewBarrier();

        // flush and wait for all CFs that are dirty in segments up-to and including 'last'
        Future future = flushDataFrom(segmentsToRecycle, true);
        try
        {
            future.get();

            for (CommitLogSegment segment : activeSegments)
                for (UUID cfId : droppedCfs)
                    segment.markClean(cfId, CommitLogPosition.NONE, segment.getCurrentCommitLogPosition());

            // now recycle segments that are unused, as we may not have triggered a discardCompletedSegments()
            // if the previous active segment was the only one to recycle (since an active segment isn't
            // necessarily dirty, and we only call dCS after a flush).
            for (CommitLogSegment segment : activeSegments)
            {
                if (segment.isUnused())
                    recycleSegment(segment);
            }

            CommitLogSegment first;
            if ((first = activeSegments.peek()) != null && first.id <= last.id)
                logger.error("Failed to force-recycle all segments; at least one segment is still in use with dirty CFs.");
        }
        catch (Throwable t)
        {
            // for now just log the error
            logger.error("Failed waiting for a forced recycle of in-use commit log segments", t);
        }
    }

    /**
     * Indicates that a segment is no longer in use and that it should be recycled.
     *
     * @param segment segment that is no longer in use
     */
    void recycleSegment(final CommitLogSegment segment)
    {
        boolean archiveSuccess = commitLog.archiver.maybeWaitForArchiving(segment.getName());
        if (activeSegments.remove(segment))
        {
            // if archiving (command) was not successful then leave the file alone. don't delete or recycle.
            discardSegment(segment, archiveSuccess);
        }
        else
        {
            logger.warn("segment {} not found in activeSegments queue", segment);
        }
    }

    /**
     * Indicates that a segment file should be deleted.
     *
     * @param segment segment to be discarded
     */
    private void discardSegment(final CommitLogSegment segment, final boolean deleteFile)
    {
        logger.trace("Segment {} is no longer active and will be deleted {}", segment, deleteFile ? "now" : "by the archive script");
        segmentManagementTasks.add(() -> discard(segment, deleteFile));
    }

    /**
     * Adjust the tracked on-disk size. Called by individual segments to reflect writes, allocations and discards.
     * @param addedSize
     */
    void addSize(long addedSize)
    {
        size.addAndGet(addedSize);
    }

    /**
     * @return the space (in bytes) used by all segment files.
     */
    public long onDiskSize()
    {
        return size.get();
    }

    private long unusedCapacity()
    {
        long total = DatabaseDescriptor.getTotalCommitlogSpaceInMB() * 1024 * 1024;
        long currentSize = size.get();
        logger.trace("Total active commitlog segment space used is {} out of {}", currentSize, total);
        return total - currentSize;
    }

    /**
     * @param name the filename to check
     * @return true if file is managed by this manager.
     */
    public boolean manages(String name)
    {
        for (CommitLogSegment segment : Iterables.concat(activeSegments, availableSegments))
            if (segment.getName().equals(name))
                return true;
        return false;
    }

    /**
     * Throws a flag that enables the behavior of keeping at least one spare segment
     * available at all times.
     */
    void enableReserveSegmentCreation()
    {
        createReserveSegments = true;
        wakeManager();
    }

    /**
     * Force a flush on all CFs that are still dirty in @param segments.
     *
     * @return a Future that will finish when all the flushes are complete.
     */
    private Future flushDataFrom(List segments, boolean force)
    {
        if (segments.isEmpty())
            return Futures.immediateFuture(null);
        final CommitLogPosition maxCommitLogPosition = segments.get(segments.size() - 1).getCurrentCommitLogPosition();

        // a map of CfId -> forceFlush() to ensure we only queue one flush per cf
        final Map> flushes = new LinkedHashMap<>();

        for (CommitLogSegment segment : segments)
        {
            for (UUID dirtyCFId : segment.getDirtyCFIDs())
            {
                Pair pair = Schema.instance.getCF(dirtyCFId);
                if (pair == null)
                {
                    // even though we remove the schema entry before a final flush when dropping a CF,
                    // it's still possible for a writer to race and finish his append after the flush.
                    logger.trace("Marking clean CF {} that doesn't exist anymore", dirtyCFId);
                    segment.markClean(dirtyCFId, CommitLogPosition.NONE, segment.getCurrentCommitLogPosition());
                }
                else if (!flushes.containsKey(dirtyCFId))
                {
                    String keyspace = pair.left;
                    final ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(dirtyCFId);
                    // can safely call forceFlush here as we will only ever block (briefly) for other attempts to flush,
                    // no deadlock possibility since switchLock removal
                    flushes.put(dirtyCFId, force ? cfs.forceFlush() : cfs.forceFlush(maxCommitLogPosition));
                }
            }
        }

        return Futures.allAsList(flushes.values());
    }

    /**
     * Stops CL, for testing purposes. DO NOT USE THIS OUTSIDE OF TESTS.
     * Only call this after the AbstractCommitLogService is shut down.
     */
    public void stopUnsafe(boolean deleteSegments)
    {
        logger.trace("CLSM closing and clearing existing commit log segments...");
        createReserveSegments = false;

        awaitManagementTasksCompletion();

        shutdown();
        try
        {
            awaitTermination();
        }
        catch (InterruptedException e)
        {
            throw new RuntimeException(e);
        }

        synchronized (this)
        {
            for (CommitLogSegment segment : activeSegments)
                closeAndDeleteSegmentUnsafe(segment, deleteSegments);
            activeSegments.clear();

            for (CommitLogSegment segment : availableSegments)
                closeAndDeleteSegmentUnsafe(segment, deleteSegments);
            availableSegments.clear();
        }

        allocatingFrom = null;

        segmentManagementTasks.clear();

        size.set(0L);

        logger.trace("CLSM done with closing and clearing existing commit log segments.");
    }

    // Used by tests only.
    void awaitManagementTasksCompletion()
    {
        while (!segmentManagementTasks.isEmpty())
            Thread.yield();
        // The last management task is not yet complete. Wait a while for it.
        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        // TODO: If this functionality is required by anything other than tests, signalling must be used to ensure
        // waiting completes correctly.
    }

    /**
     * Explicitly for use only during resets in unit testing.
     */
    private void closeAndDeleteSegmentUnsafe(CommitLogSegment segment, boolean delete)
    {
        try
        {
            discard(segment, delete);
        }
        catch (AssertionError ignored)
        {
            // segment file does not exist
        }
    }

    /**
     * Returns when the management thread terminates.
     */
    public void awaitTermination() throws InterruptedException
    {
        managerThread.join();

        for (CommitLogSegment segment : activeSegments)
            segment.close();

        for (CommitLogSegment segment : availableSegments)
            segment.close();

        bufferPool.shutdown();
    }

    /**
     * @return a read-only collection of the active commit log segments
     */
    @VisibleForTesting
    public Collection getActiveSegments()
    {
        return Collections.unmodifiableCollection(activeSegments);
    }

    /**
     * @return the current CommitLogPosition of the active segment we're allocating from
     */
    CommitLogPosition getCurrentPosition()
    {
        return allocatingFrom().getCurrentCommitLogPosition();
    }

    /**
     * Forces a disk flush on the commit log files that need it.  Blocking.
     */
    public void sync(boolean syncAllSegments) throws IOException
    {
        CommitLogSegment current = allocatingFrom();
        for (CommitLogSegment segment : getActiveSegments())
        {
            if (!syncAllSegments && segment.id > current.id)
                return;
            segment.sync();
        }
    }

    /**
     * Used by compressed and encrypted segments to share a buffer pool across the CLSM.
     */
    SimpleCachedBufferPool getBufferPool()
    {
        return bufferPool;
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy