org.neo4j.kernel.impl.transaction.log.BatchingTransactionAppender Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of neo4j-kernel Show documentation
Neo4j kernel is a lightweight, embedded Java database designed to store data structured as graphs rather than tables. For more information, see http://neo4j.org.
There is a newer version: 5.26.1
Show newest version
/*
 * Copyright (c) 2002-2016 "Neo Technology,"
 * Network Engine for Objects in Lund AB [http://neotechnology.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.kernel.impl.transaction.log;

import java.io.Flushable;
import java.io.IOException;
import java.nio.channels.ClosedChannelException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.LockSupport;
import java.util.concurrent.locks.ReentrantLock;

import org.neo4j.kernel.impl.api.TransactionToApply;
import org.neo4j.kernel.impl.transaction.TransactionRepresentation;
import org.neo4j.kernel.impl.transaction.log.entry.LogEntryWriter;
import org.neo4j.kernel.impl.transaction.log.rotation.LogRotation;
import org.neo4j.kernel.impl.transaction.tracing.LogAppendEvent;
import org.neo4j.kernel.impl.transaction.tracing.LogCheckPointEvent;
import org.neo4j.kernel.impl.transaction.tracing.LogForceEvent;
import org.neo4j.kernel.impl.transaction.tracing.LogForceEvents;
import org.neo4j.kernel.impl.transaction.tracing.LogForceWaitEvent;
import org.neo4j.kernel.impl.transaction.tracing.SerializeTransactionEvent;
import org.neo4j.kernel.impl.util.IdOrderingQueue;
import org.neo4j.kernel.internal.DatabaseHealth;
import org.neo4j.kernel.lifecycle.LifecycleAdapter;

import static org.neo4j.kernel.impl.api.TransactionToApply.TRANSACTION_ID_NOT_SPECIFIED;
import static org.neo4j.kernel.impl.transaction.log.entry.LogEntryStart.checksum;

/**
 * Concurrently appends transactions to the transaction log, while coordinating with the log rotation and forcing the
 * log file in batches for higher throughput in a concurrent scenario.
 */
public class BatchingTransactionAppender extends LifecycleAdapter implements TransactionAppender
{
    // For the graph store and schema indexes order-of-updates are managed by the high level entity locks
    // such that changes are applied to the affected records in the same order that they are written to the
    // log. For the legacy indexes there are no such locks, and hence no such ordering. This queue below
    // is introduced to manage just that and is only used for transactions that contain any legacy index changes.
    private final IdOrderingQueue legacyIndexTransactionOrdering;

    private final AtomicReference threadLinkHead = new AtomicReference<>( ThreadLink.END );
    private final TransactionMetadataCache transactionMetadataCache;
    private final LogFile logFile;
    private final LogRotation logRotation;
    private final TransactionIdStore transactionIdStore;
    private final LogPositionMarker positionMarker = new LogPositionMarker();
    private final DatabaseHealth databaseHealth;
    private final Lock forceLock = new ReentrantLock();

    private FlushablePositionAwareChannel writer;
    private TransactionLogWriter transactionLogWriter;
    private IndexCommandDetector indexCommandDetector;

    public BatchingTransactionAppender( LogFile logFile, LogRotation logRotation,
            TransactionMetadataCache transactionMetadataCache, TransactionIdStore transactionIdStore,
            IdOrderingQueue legacyIndexTransactionOrdering, DatabaseHealth databaseHealth )
    {
        this.logFile = logFile;
        this.logRotation = logRotation;
        this.transactionIdStore = transactionIdStore;
        this.legacyIndexTransactionOrdering = legacyIndexTransactionOrdering;
        this.databaseHealth = databaseHealth;
        this.transactionMetadataCache = transactionMetadataCache;
    }

    @Override
    public void start() throws Throwable
    {
        this.writer = logFile.getWriter();
        this.indexCommandDetector = new IndexCommandDetector();
        this.transactionLogWriter = new TransactionLogWriter( new LogEntryWriter( writer ) );
    }

    @Override
    public long append( TransactionToApply batch, LogAppendEvent logAppendEvent ) throws IOException
    {
        // We put log rotation check outside the private append method since it must happen before
        // we generate the next transaction id
        boolean logRotated = logRotation.rotateLogIfNeeded( logAppendEvent );
        logAppendEvent.setLogRotated( logRotated );

        // Assigned base tx id just to make compiler happy
        long lastTransactionId = TransactionIdStore.BASE_TX_ID;
        // Synchronized with logFile to get absolute control over concurrent rotations happening
        synchronized ( logFile )
        {
            // Assert that kernel is healthy before making any changes
            databaseHealth.assertHealthy( IOException.class );
            try ( SerializeTransactionEvent serialiseEvent = logAppendEvent.beginSerializeTransaction() )
            {
                // Append all transactions in this batch to the log under the same logFile monitor
                TransactionToApply tx = batch;
                while ( tx != null )
                {
                    long transactionId = transactionIdStore.nextCommittingTransactionId();

                    // If we're in a scenario where we're merely replicating transactions, i.e. transaction
                    // id have already been generated by another entity we simply check that our id
                    // that we generated match that id. If it doesn't we've run into a problem we can't ´
                    // really recover from and would point to a bug somewhere.
                    matchAgainstExpectedTransactionIdIfAny( transactionId, tx );

                    TransactionCommitment commitment = appendToLog( tx.transactionRepresentation(), transactionId );
                    tx.commitment( commitment, transactionId );
                    tx = tx.next();
                    lastTransactionId = transactionId;
                }
            }
        }

        // At this point we've appended all transactions in this batch, but we can't mark any of them
        // as committed since they haven't been forced to disk yet. So here we force, or potentially
        // piggy-back on another force, but anyway after this call below we can be sure that all our transactions
        // in this batch exist durably on disk.
        forceAfterAppend( logAppendEvent );

        // Mark all transactions as committed
        publishAsCommitted( batch );

        return lastTransactionId;
    }

    private void matchAgainstExpectedTransactionIdIfAny( long transactionId, TransactionToApply tx )
    {
        long expectedTransactionId = tx.transactionId();
        if ( expectedTransactionId != TRANSACTION_ID_NOT_SPECIFIED )
        {
            if ( transactionId != expectedTransactionId )
            {
                throw new IllegalStateException(
                        "Received " + tx.transactionRepresentation() + " with txId:" + expectedTransactionId +
                        " to be applied, but appending it ended up generating an unexpected txId:" + transactionId );
            }
        }
    }

    private void publishAsCommitted( TransactionToApply batch )
    {
        while ( batch != null )
        {
            batch.commitment().publishAsCommitted();
            batch = batch.next();
        }
    }

    @Override
    public void checkPoint( LogPosition logPosition, LogCheckPointEvent logCheckPointEvent ) throws IOException
    {
        try
        {
            // Synchronized with logFile to get absolute control over concurrent rotations happening
            synchronized ( logFile )
            {
                transactionLogWriter.checkPoint( logPosition );
            }
        }
        catch ( Throwable cause )
        {
            databaseHealth.panic( cause );
            throw cause;
        }

        forceAfterAppend( logCheckPointEvent );
    }

    /**
     * @return A TransactionCommitment instance with metadata about the committed transaction, such as whether or not
     * this transaction contains any legacy index changes.
     */
    private TransactionCommitment appendToLog( TransactionRepresentation transaction, long transactionId )
            throws IOException
    {
        // Reset command writer so that we, after we've written the transaction, can ask it whether or
        // not any legacy index command was written. If so then there's additional ordering to care about below.
        indexCommandDetector.reset();

        // The outcome of this try block is either of:
        // a) transaction successfully appended, at which point we return a Commitment to be used after force
        // b) transaction failed to be appended, at which point a kernel panic is issued
        // The reason that we issue a kernel panic on failure in here is that at this point we're still
        // holding the logFile monitor, and a failure to append needs to be communicated with potential
        // log rotation, which will wait for all transactions closed or fail on kernel panic.
        try
        {
            LogPosition logPositionBeforeCommit = writer.getCurrentPosition( positionMarker ).newPosition();
            transactionLogWriter.append( transaction, transactionId );
            LogPosition logPositionAfterCommit = writer.getCurrentPosition( positionMarker ).newPosition();

            long transactionChecksum = checksum(
                    transaction.additionalHeader(), transaction.getMasterId(), transaction.getAuthorId() );
            transactionMetadataCache.cacheTransactionMetadata(
                    transactionId, logPositionBeforeCommit, transaction.getMasterId(), transaction.getAuthorId(),
                    transactionChecksum, transaction.getTimeCommitted() );

            transaction.accept( indexCommandDetector );
            boolean hasLegacyIndexChanges = indexCommandDetector.hasWrittenAnyLegacyIndexCommand();
            if ( hasLegacyIndexChanges )
            {
                // Offer this transaction id to the queue so that the legacy index applier can take part in the ordering
                legacyIndexTransactionOrdering.offer( transactionId );
            }
            return new TransactionCommitment(
                    hasLegacyIndexChanges, transactionId, transactionChecksum, transaction.getTimeCommitted(),
                    logPositionAfterCommit, transactionIdStore );
        }
        catch ( final Throwable panic )
        {
            databaseHealth.panic( panic );
            throw panic;
        }
    }

    /**
     * Called by the appender that just appended a transaction to the log.
     *
     * @param logForceEvents A trace event for the given log append operation.
     */
    protected void forceAfterAppend( LogForceEvents logForceEvents ) throws IOException
    {
        // There's a benign race here, where we add our link before we update our next pointer.
        // This is okay, however, because unparkAll() spins when it sees a null next pointer.
        ThreadLink threadLink = new ThreadLink( Thread.currentThread() );
        threadLink.next = threadLinkHead.getAndSet( threadLink );
        boolean attemptedForce = false;

        try ( LogForceWaitEvent logForceWaitEvent = logForceEvents.beginLogForceWait() )
        {
            do
            {
                if ( forceLock.tryLock() )
                {
                    attemptedForce = true;
                    try
                    {
                        forceLog( logForceEvents );
                        // In the event of any failure a database panic will be raised and thrown here
                    }
                    finally
                    {
                        forceLock.unlock();

                        // We've released the lock, so unpark anyone who might have decided park while we were working.
                        // The most recently parked thread is the one most likely to still have warm caches, so that's
                        // the one we would prefer to unpark. Luckily, the stack nature of the ThreadLinks makes it easy
                        // to get to.
                        ThreadLink nextWaiter = threadLinkHead.get();
                        nextWaiter.unpark();
                    }
                }
                else
                {
                    waitForLogForce();
                }
            }
            while ( !threadLink.done );

            // If there were many threads committing simultaneously and I wasn't the lucky one
            // actually doing the forcing (where failure would throw panic exception) I need to
            // explicitly check if everything is OK before considering this transaction committed.
            if ( !attemptedForce )
            {
                databaseHealth.assertHealthy( IOException.class );
            }
        }
    }

    private void forceLog( LogForceEvents logForceEvents ) throws IOException
    {
        ThreadLink links = threadLinkHead.getAndSet( ThreadLink.END );
        try ( LogForceEvent logForceEvent = logForceEvents.beginLogForce() )
        {
            force();
        }
        catch ( final Throwable panic )
        {
            databaseHealth.panic( panic );
            throw panic;
        }
        finally
        {
            unparkAll( links );
        }
    }

    private void unparkAll( ThreadLink links )
    {
        do
        {
            links.done = true;
            links.unpark();
            ThreadLink tmp;
            do
            {
                // Spin because of the race:y update when consing.
                tmp = links.next;
            }
            while ( tmp == null );
            links = tmp;
        }
        while ( links != ThreadLink.END );
    }

    private void waitForLogForce()
    {
        long parkTime = TimeUnit.MILLISECONDS.toNanos( 100 );
        LockSupport.parkNanos( this, parkTime );
    }

    private void force() throws IOException
    {
        // Empty buffer into writer. We want to synchronize with appenders somehow so that they
        // don't append while we're doing that. The way rotation is coordinated we can't synchronize
        // on logFile because it would cause deadlocks. Synchronizing on writer assumes that appenders
        // also synchronize on writer.
        Flushable flushable;
        synchronized ( logFile )
        {
            flushable = writer.prepareForFlush();
        }
        // Force the writer outside of the lock.
        // This allows other threads access to the buffer while the writer is being forced.
        try
        {
            flushable.flush();
        }
        catch ( ClosedChannelException ignored )
        {
            // This is ok, we were already successful in emptying the buffer, so the channel being closed here means
            // that some other thread is rotating the log and has closed the underlying channel. But since we were
            // successful in emptying the buffer *UNDER THE LOCK* we know that the rotating thread included the changes
            // we emptied into the channel, and thus it is already flushed by that thread.
        }
    }
}