com.bigdata.journal.CompactTask Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.journal;
import java.io.File;
import java.util.Iterator;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import com.bigdata.bfs.BigdataFileSystem;
import com.bigdata.btree.BTree;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.IOverflowHandler;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegmentBuilder;
import com.bigdata.journal.Journal.Options;
import com.bigdata.resources.OverflowManager;
import com.bigdata.util.DaemonThreadFactory;
import com.bigdata.util.concurrent.ShutdownHelper;
/**
* Task compacts the journal state onto a caller specified file. This may be
* used to compact a journal, to create backups, or to convert an in-memory
* journal into a disk-based journal. The task reads the state of each named
* index as of the selected commit point, writing the index entries in index
* order onto the output journal. This process will typically both reduce the
* the space on disk required by the (new) backing store and improve locality in
* the (new) backing store.
*
* Note: The new {@link Journal} WILL NOT include any historical commit points
* other than the one selected by the caller specified commitTime.
*
* Note: If any indices use references to raw records then they MUST define an
* {@link IOverflowHandler} in order for the raw records to be copied to the new
* store and those references updated in the index to point to the records in
* the new store. For example, the {@link BigdataFileSystem} uses such
* references and defines an {@link IOverflowHandler} so that the raw file
* blocks will not be lost on overflow.
*
* @author Bryan Thompson
* @version $Id$
*
* @see Journal#compact(File)
*
* @todo it would be easy enough to change the branching factor during this
* task.
*
* @todo add listener api for start/end notice (and perhaps error).
*
* @todo add export task that builds index segments from the indices on a
* journal and work this into {@link CompactJournalUtility} or a similar
* utility class.
*/
public class CompactTask implements Callable {
/**
* Logger.
*/
protected static final Logger log = Logger.getLogger(CompactTask.class);
final static protected boolean INFO = log.isInfoEnabled();
final static protected boolean DEBUG = log.isDebugEnabled();
/** The source {@link Journal}. */
final protected Journal oldJournal;
/** The output {@link File}. */
final protected File outFile;
/** The caller specified commit time. */
final protected long commitTime;
/**
* The {@link ICommitRecord} corresponding to the caller specified commit
* time.
*/
final protected ICommitRecord commitRecord;
// cause from first task to error.
final protected AtomicReference firstCause = new AtomicReference();
// #of tasks started.
final protected AtomicInteger startCount = new AtomicInteger(0);
// #of tasks completed successfully.
final protected AtomicInteger doneCount = new AtomicInteger(0);
/**
* The task reads the state of each named index as of the given
* commitTime and writes the index data in order on the output journal.
*
* Note: Unlike the {@link IndexSegmentBuilder}, this does not produce
* a perfect read-optimized index. However, in many cases this task does
* significantly improve the locality of reference for the {@link BTree}s
* and will discard any deleted data or data which has been
* overrwritten.
*
* @param src
* The source journal.
* @param outFile
* The output file.
* @param commitTime
* The commit time whose state will be compacted onto the
* output file (the first commit point whose commit time is
* LTE to the given commit time will be used).
*/
public CompactTask(final Journal src, final File outFile,
final long commitTime) {
if (src == null)
throw new IllegalArgumentException();
if (outFile == null)
throw new IllegalArgumentException();
if (commitTime <= 0) {
// invalid commit time.
throw new IllegalArgumentException();
}
if (commitTime > src.getLastCommitTime())
// time beyond the most recent commit time.
throw new IllegalArgumentException();
this.oldJournal = src;
this.outFile = outFile;
this.commitTime = commitTime;
this.commitRecord = src.getCommitRecord(commitTime);
}
/**
* Compact the {@link #oldJournal} journal onto the {@link #outFile}
* file.
*
* @return The already open {@link Journal} iff this task succeeds. If
* the task fails, then the {@link Journal} (if created) will
* have been closed. If you are backing up data, then be sure to
* shutdown the returned {@link Journal} so that it can release
* its resources.
*/
public Journal call() throws Exception {
final Journal newJournal = createJournal();
try {
// copy all named indices.
copyIndices(newJournal);
// write a commit point (!!!)
newJournal.commit();
return newJournal;
} catch (Throwable t) {
try {
// make sure that the output journal is closed.
newJournal.close();
} catch (Throwable t2) {
log.warn("Could not close the new journal", t2);
// ignore.
}
// rethrow the exception.
throw new RuntimeException(t);
}
}
/**
* Create the output journal.
*
* @return The output journal.
*/
protected Journal createJournal() {
// default properties from the source journal.
final Properties p = oldJournal.getProperties();
// set the file for the new journal.
p.setProperty(Options.FILE, outFile.getAbsolutePath());
if (p.getProperty(Options.CREATE_TEMP_FILE) != null) {
// make sure that this property is turned off.
p.setProperty(Options.CREATE_TEMP_FILE, "false");
}
if (p.getProperty(Options.BUFFER_MODE) != null) {
BufferMode bufferMode = BufferMode.valueOf(p
.getProperty(Options.BUFFER_MODE));
if (!bufferMode.isStable()) {
/*
* Force the disk-only mode if the source journal was not
* stable.
*/
p.setProperty(Options.BUFFER_MODE, BufferMode.Disk.toString());
}
}
return new Journal(p);
}
/**
* Copy all named indices from the {@link #oldJournal} to the new
* journal.
*
* Note: This code is similar to that found in the
* {@link OverflowManager}.
*
* @param newJournal
* The new journal.
*/
protected void copyIndices(final Journal newJournal) {
final long begin = System.currentTimeMillis();
// using snapshot isolation view of Name2Addr
final int nindices = (int) oldJournal.getName2Addr(commitTime)
.rangeCount(null, null);
final Iterator nitr = oldJournal.indexNameScan(
null/* prefix */, commitTime);
// // using read-committed view of Name2Addr
// final ITupleIterator itr = oldJournal.getName2Addr(commitTime)
// .rangeIterator(null, null);
/*
* This service will limit the #of indices that we process in parallel.
*
* Note: Based on some (limited) experimentation, the store file is
* reduced by the same amount regardless of parallel vs serial
* processing of the index files.
*
* Note: Too much parallelism here appears to slow things down.
*
* FIXME The ParallelismLimitedExecutorService is broken (11/10/08).
* This can be demonstrated if it is enabled for the pipeline join.
* Therefore it has been taken out of service until it can be fixed.
*/
// final ParallelismLimitedExecutorService service = new ParallelismLimitedExecutorService(
// oldJournal.getExecutorService(), 3/* maxParallel */, 20/* queueCapacity */);
final ThreadPoolExecutor service = (ThreadPoolExecutor)Executors.newFixedThreadPool(
3/* maxParallel */, DaemonThreadFactory.defaultThreadFactory());
while (nitr.hasNext()) {
// final ITuple tuple = itr.next();
//
// final Entry entry = EntrySerializer.INSTANCE
// .deserialize(new DataInputBuffer(tuple.getValue()));
final String name = nitr.next();
// Submit task to copy the index to the new journal.
service.submit(new CopyIndexTask(newJournal, name));
}
try {
// shutdown the service and await termination.
new ShutdownHelper(service, 60L/* logTimeout */, TimeUnit.SECONDS) {
protected void logTimeout() {
if(INFO)
log.info("Waiting on task(s)" + ": elapsed="
+ TimeUnit.NANOSECONDS.toMillis(elapsed())
+ "ms, #active=" + service.getActiveCount());
}
};
} catch (InterruptedException e) {
/*
* Interrupted awaiting task completion. shutdown the service
* immediately and rethrow the exception.
*/
service.shutdownNow();
throw new RuntimeException(e);
}
final long elapsed = System.currentTimeMillis() - begin;
if (INFO)
log.info("Copied " + nindices + " in " + elapsed + "ms");
} // copyIndices
/**
* Copy an index to the new journal.
*
* @author Bryan Thompson
* @version $Id$
*/
protected class CopyIndexTask implements Callable {
/** The new journal. */
protected final Journal newJournal;
// /**
// * An {@link Entry} from the {@link Name2Addr} index for an index
// * defined on the {@link #oldJournal}.
// */
// protected final Entry entry;
private final String name;
/**
* @param newJournal
* The new journal.
* @param name The name of an index to be copied.
*/
public CopyIndexTask(final Journal newJournal, final String name) {
if (newJournal == null)
throw new IllegalArgumentException();
if (name == null)
throw new IllegalArgumentException();
this.newJournal = newJournal;
this.name = name;
}
/**
* Creates and index on the {@link #newJournal}, copies the data from
* the index on the old journal, and then registers the new index on the
* {@link #newJournal}.
*/
public Void call() throws Exception {
try {
startCount.incrementAndGet();
if (INFO)
log.info("Start: name=" + name);
// source index.
// final BTree oldBTree = (BTree) oldJournal
// .getIndexWithCheckpointAddr(entry.checkpointAddr);
/*
* This only supports the BTree class.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/585
* (GIST)
*/
final BTree oldBTree = (BTree) oldJournal
.getIndexWithCommitRecord(name, commitRecord);
// #of index entries on the old index.
final long entryCount = oldBTree.rangeCount();
// clone index metadata.
final IndexMetadata indexMetadata = oldBTree.getIndexMetadata()
.clone();
/*
* Create and register the index on the new journal.
*
* Note: This is essentially a variant of BTree#create() where
* we need to propagate the counter from the old BTree to the
* new BTree.
*/
/*
* Write metadata record on store. The address of that record is
* set as a side-effect on the metadata object.
*/
indexMetadata.write(newJournal);
// note the current counter value.
final long oldCounter = oldBTree.getCounter().get();
if (INFO)
log.info("name=" + name //
+ ", entryCount=" + entryCount//
+ ", checkpoint=" + oldBTree.getCheckpoint()//
);
// Create checkpoint for the new B+Tree.
final Checkpoint overflowCheckpoint = indexMetadata
.overflowCheckpoint(oldBTree.getCheckpoint());
/*
* Write the checkpoint record on the store. The address of the
* checkpoint record is set on the object as a side effect.
*/
overflowCheckpoint.write(newJournal);
/*
* Load the B+Tree from the store using that checkpoint record.
*/
final BTree newBTree = BTree
.load(newJournal, overflowCheckpoint
.getCheckpointAddr(), false/* readOnly */);
// Note the counter value on the new BTree.
final long newCounter = newBTree.getCounter().get();
// Verify the counter was propagated to the new BTree.
assert newCounter == oldCounter : "expected oldCounter="
+ oldCounter + ", but found newCounter=" + newCounter;
/*
* Copy the data from the B+Tree on the old journal into the
* B+Tree on the new journal.
*
* Note: [overflow := true] since we are copying from the old
* journal onto the new journal.
*/
if (DEBUG)
log.debug("Copying data to new journal: name=" + name
+ ", entryCount=" + entryCount);
newBTree.rangeCopy(oldBTree, null, null, true/* overflow */);
/*
* Register the new B+Tree on the new journal.
*/
newJournal.registerIndex(name, newBTree);
if (DEBUG)
log.debug("Done with index: name=" + name);
doneCount.incrementAndGet();
} catch (Throwable t) {
firstCause.compareAndSet(null/* expect */, t);
}
return null;
}
}
}