com.bigdata.journal.CompactTask Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.journal;

import java.io.File;
import java.util.Iterator;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.log4j.Logger;

import com.bigdata.bfs.BigdataFileSystem;
import com.bigdata.btree.BTree;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.IOverflowHandler;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegmentBuilder;
import com.bigdata.journal.Journal.Options;
import com.bigdata.resources.OverflowManager;
import com.bigdata.util.DaemonThreadFactory;
import com.bigdata.util.concurrent.ShutdownHelper;

/**
 * Task compacts the journal state onto a caller specified file. This may be
 * used to compact a journal, to create backups, or to convert an in-memory
 * journal into a disk-based journal. The task reads the state of each named
 * index as of the selected commit point, writing the index entries in index
 * order onto the output journal. This process will typically both reduce the
 * the space on disk required by the (new) backing store and improve locality in
 * the (new) backing store.
 * 
 * Note: The new {@link Journal} WILL NOT include any historical commit points
 * other than the one selected by the caller specified commitTime.
 * 

 * Note: If any indices use references to raw records then they MUST define an
 * {@link IOverflowHandler} in order for the raw records to be copied to the new
 * store and those references updated in the index to point to the records in
 * the new store. For example, the {@link BigdataFileSystem} uses such
 * references and defines an {@link IOverflowHandler} so that the raw file
 * blocks will not be lost on overflow.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @see Journal#compact(File)
 * 
 * @todo it would be easy enough to change the branching factor during this
 *       task.
 * 
 * @todo add listener api for start/end notice (and perhaps error).
 * 
 * @todo add export task that builds index segments from the indices on a
 *       journal and work this into {@link CompactJournalUtility} or a similar
 *       utility class.
 */
public class CompactTask implements Callable {

    /**
     * Logger.
     */
    protected static final Logger log = Logger.getLogger(CompactTask.class);

    final static protected boolean INFO = log.isInfoEnabled();

    final static protected boolean DEBUG = log.isDebugEnabled();

    /** The source {@link Journal}. */
    final protected Journal oldJournal;

    /** The output {@link File}. */
    final protected File outFile;

    /** The caller specified commit time. */
    final protected long commitTime;

    /**
     * The {@link ICommitRecord} corresponding to the caller specified commit
     * time.
     */
    final protected ICommitRecord commitRecord;
    
    // cause from first task to error.
    final protected AtomicReference firstCause = new AtomicReference();
    
    // #of tasks started.
    final protected AtomicInteger startCount = new AtomicInteger(0);
    
    // #of tasks completed successfully.
    final protected AtomicInteger doneCount = new AtomicInteger(0);
    
    /**
     * The task reads the state of each named index as of the given
     * commitTime and writes the index data in order on the output journal.
     * 

     * Note: Unlike the {@link IndexSegmentBuilder}, this does not produce
     * a perfect read-optimized index. However, in many cases this task does
     * significantly improve the locality of reference for the {@link BTree}s
     * and will discard any deleted data or data which has been
     * overrwritten.
     * 
     * @param src
     *            The source journal.
     * @param outFile
     *            The output file.
     * @param commitTime
     *            The commit time whose state will be compacted onto the
     *            output file (the first commit point whose commit time is
     *            LTE to the given commit time will be used).
     */
    public CompactTask(final Journal src, final File outFile,
            final long commitTime) {

        if (src == null)
            throw new IllegalArgumentException();

        if (outFile == null)
            throw new IllegalArgumentException();

        if (commitTime <= 0) {
            // invalid commit time.
            throw new IllegalArgumentException();
        }

        if (commitTime > src.getLastCommitTime())
            // time beyond the most recent commit time.
            throw new IllegalArgumentException();

        this.oldJournal = src;

        this.outFile = outFile;
        
        this.commitTime = commitTime;
        
        this.commitRecord = src.getCommitRecord(commitTime);

    }

    /**
     * Compact the {@link #oldJournal} journal onto the {@link #outFile}
     * file.
     * 
     * @return The already open {@link Journal} iff this task succeeds. If
     *         the task fails, then the {@link Journal} (if created) will
     *         have been closed. If you are backing up data, then be sure to
     *         shutdown the returned {@link Journal} so that it can release
     *         its resources.
     */
    public Journal call() throws Exception {

        final Journal newJournal = createJournal();

        try {

            // copy all named indices.
            copyIndices(newJournal);

            // write a commit point (!!!)
            newJournal.commit();

            return newJournal;

        } catch (Throwable t) {

            try {

                // make sure that the output journal is closed.
                newJournal.close();

            } catch (Throwable t2) {

                log.warn("Could not close the new journal", t2);

                // ignore.

            }

            // rethrow the exception.
            throw new RuntimeException(t);

        }

    }

    /**
     * Create the output journal.
     * 
     * @return The output journal.
     */
    protected Journal createJournal() {

        // default properties from the source journal.
        final Properties p = oldJournal.getProperties();

        // set the file for the new journal.
        p.setProperty(Options.FILE, outFile.getAbsolutePath());

        if (p.getProperty(Options.CREATE_TEMP_FILE) != null) {

            // make sure that this property is turned off.
            p.setProperty(Options.CREATE_TEMP_FILE, "false");

        }

        if (p.getProperty(Options.BUFFER_MODE) != null) {

            BufferMode bufferMode = BufferMode.valueOf(p
                    .getProperty(Options.BUFFER_MODE));

            if (!bufferMode.isStable()) {

                /*
                 * Force the disk-only mode if the source journal was not
                 * stable.
                 */
                p.setProperty(Options.BUFFER_MODE, BufferMode.Disk.toString());

            }

        }

        return new Journal(p);

    }

    /**
     * Copy all named indices from the {@link #oldJournal} to the new
     * journal.
     * 
     * Note: This code is similar to that found in the
     * {@link OverflowManager}.
     * 
     * @param newJournal
     *            The new journal.
     */
    protected void copyIndices(final Journal newJournal) {

        final long begin = System.currentTimeMillis();

        // using snapshot isolation view of Name2Addr
        final int nindices = (int) oldJournal.getName2Addr(commitTime)
                .rangeCount(null, null);

        final Iterator nitr = oldJournal.indexNameScan(
                null/* prefix */, commitTime);

//        // using read-committed view of Name2Addr
//        final ITupleIterator itr = oldJournal.getName2Addr(commitTime)
//                .rangeIterator(null, null);

        /*
         * This service will limit the #of indices that we process in parallel.
         * 
         * Note: Based on some (limited) experimentation, the store file is
         * reduced by the same amount regardless of parallel vs serial
         * processing of the index files.
         * 
         * Note: Too much parallelism here appears to slow things down.
         * 
         * FIXME The ParallelismLimitedExecutorService is broken (11/10/08).
         * This can be demonstrated if it is enabled for the pipeline join.
         * Therefore it has been taken out of service until it can be fixed.
         */
//        final ParallelismLimitedExecutorService service = new ParallelismLimitedExecutorService(
//                oldJournal.getExecutorService(), 3/* maxParallel */, 20/* queueCapacity */);
        
        final ThreadPoolExecutor service = (ThreadPoolExecutor)Executors.newFixedThreadPool(
                3/* maxParallel */, DaemonThreadFactory.defaultThreadFactory());
        
        while (nitr.hasNext()) {

//            final ITuple tuple = itr.next();
//
//            final Entry entry = EntrySerializer.INSTANCE
//                    .deserialize(new DataInputBuffer(tuple.getValue()));

            final String name = nitr.next();
            
            // Submit task to copy the index to the new journal.
            service.submit(new CopyIndexTask(newJournal, name));

        }

        try {

            // shutdown the service and await termination.
            new ShutdownHelper(service, 60L/* logTimeout */, TimeUnit.SECONDS) {

                protected void logTimeout() {

                    if(INFO)
                    log.info("Waiting on task(s)" + ": elapsed="
                            + TimeUnit.NANOSECONDS.toMillis(elapsed())
                            + "ms, #active=" + service.getActiveCount());
                    
                }

            };

        } catch (InterruptedException e) {

            /*
             * Interrupted awaiting task completion. shutdown the service
             * immediately and rethrow the exception.
             */

            service.shutdownNow();
            
            throw new RuntimeException(e);

        }
        
        final long elapsed = System.currentTimeMillis() - begin;

        if (INFO)
            log.info("Copied " + nindices + " in " + elapsed + "ms");

    } // copyIndices

    /**
     * Copy an index to the new journal.
     * 
     * @author Bryan Thompson
     * @version $Id$
     */
    protected class CopyIndexTask implements Callable {
        
        /** The new journal. */
        protected final Journal newJournal;

//        /**
//         * An {@link Entry} from the {@link Name2Addr} index for an index
//         * defined on the {@link #oldJournal}.
//         */
//        protected final Entry entry;

        private final String name;
        
        /**
         * @param newJournal
         *            The new journal.
         * @param name The name of an index to be copied.
         */
        public CopyIndexTask(final Journal newJournal, final String name) {

            if (newJournal == null)
                throw new IllegalArgumentException();
            
            if (name == null)
                throw new IllegalArgumentException();

            this.newJournal = newJournal;

            this.name = name;

        }

        /**
         * Creates and index on the {@link #newJournal}, copies the data from
         * the index on the old journal, and then registers the new index on the
         * {@link #newJournal}.
         */
        public Void call() throws Exception {

            try {

                startCount.incrementAndGet();

                if (INFO)
                    log.info("Start: name=" + name);
                
                // source index.
//                final BTree oldBTree = (BTree) oldJournal
//                        .getIndexWithCheckpointAddr(entry.checkpointAddr);

                /*
                 * This only supports the BTree class.
                 * 
                 * @see https://sourceforge.net/apps/trac/bigdata/ticket/585
                 * (GIST)
                 */
                final BTree oldBTree = (BTree) oldJournal
                        .getIndexWithCommitRecord(name, commitRecord);
                
                // #of index entries on the old index.
                final long entryCount = oldBTree.rangeCount();

                // clone index metadata.
                final IndexMetadata indexMetadata = oldBTree.getIndexMetadata()
                        .clone();

                /*
                 * Create and register the index on the new journal.
                 * 
                 * Note: This is essentially a variant of BTree#create() where
                 * we need to propagate the counter from the old BTree to the
                 * new BTree.
                 */

                /*
                 * Write metadata record on store. The address of that record is
                 * set as a side-effect on the metadata object.
                 */
                indexMetadata.write(newJournal);

                // note the current counter value.
                final long oldCounter = oldBTree.getCounter().get();

                if (INFO)
                    log.info("name=" + name //
                            + ", entryCount=" + entryCount//
                            + ", checkpoint=" + oldBTree.getCheckpoint()//
                    );

                // Create checkpoint for the new B+Tree.
                final Checkpoint overflowCheckpoint = indexMetadata
                        .overflowCheckpoint(oldBTree.getCheckpoint());

                /*
                 * Write the checkpoint record on the store. The address of the
                 * checkpoint record is set on the object as a side effect.
                 */
                overflowCheckpoint.write(newJournal);

                /*
                 * Load the B+Tree from the store using that checkpoint record.
                 */
                final BTree newBTree = BTree
                        .load(newJournal, overflowCheckpoint
                                .getCheckpointAddr(), false/* readOnly */);

                // Note the counter value on the new BTree.
                final long newCounter = newBTree.getCounter().get();

                // Verify the counter was propagated to the new BTree.
                assert newCounter == oldCounter : "expected oldCounter="
                        + oldCounter + ", but found newCounter=" + newCounter;

                /*
                 * Copy the data from the B+Tree on the old journal into the
                 * B+Tree on the new journal.
                 * 
                 * Note: [overflow := true] since we are copying from the old
                 * journal onto the new journal.
                 */

                if (DEBUG)
                    log.debug("Copying data to new journal: name=" + name
                            + ", entryCount=" + entryCount);

                newBTree.rangeCopy(oldBTree, null, null, true/* overflow */);

                /*
                 * Register the new B+Tree on the new journal.
                 */
                newJournal.registerIndex(name, newBTree);

                if (DEBUG)
                    log.debug("Done with index: name=" + name);

                doneCount.incrementAndGet();

            } catch (Throwable t) {

                firstCause.compareAndSet(null/* expect */, t);

            }

            return null;

        }

    }

}