com.bigdata.rdf.load.RDFLoadTaskFactory Maven / Gradle / Ivy
package com.bigdata.rdf.load;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.Instrument;
import com.bigdata.rdf.rio.IAsynchronousWriteStatementBufferFactory;
import com.bigdata.rdf.rio.RDFParserOptions;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.service.ILoadBalancerService;
/**
* Factory for tasks for loading RDF resources into a database.
*
* @author Bryan Thompson
* @version $Id$
*/
public class RDFLoadTaskFactory extends
AbstractRDFTaskFactory {
/**
*
* @param db
* @param bufferCapacity
* @param verifyData
* @param deleteAfter
* if the file should be deleted once it has been loaded.
* @param fallback
* An attempt will be made to determine the interchange syntax
* using {@link RDFFormat}. If no determination can be made then
* the loader will presume that the files are in the format
* specified by this parameter (if any). Files whose format can
* not be determined will be logged as errors.
*
* @todo drop the writeBuffer arg.
*/
public RDFLoadTaskFactory(final AbstractTripleStore db,
final int bufferCapacity, final RDFParserOptions parserOptions,
final boolean deleteafter, final RDFFormat fallback) {
this(db, parserOptions, deleteafter, fallback,
new LoadStatementBufferFactory(db, bufferCapacity));
}
/**
*
* @param db
* @param verifyData
* @param deleteAfter
* if the file should be deleted once it has been loaded.
* @param fallback
* An attempt will be made to determine the interchange syntax
* using {@link RDFFormat}. If no determination can be made then
* the loader will presume that the files are in the format
* specified by this parameter (if any). Files whose format can
* not be determined will be logged as errors.
* @param factory
* Used to buffer and load statements.
*/
public RDFLoadTaskFactory(final AbstractTripleStore db,
final RDFParserOptions parserOptions, final boolean deleteafter,
final RDFFormat fallback, IStatementBufferFactory factory) {
super(db, parserOptions, deleteafter, fallback, factory);
}
/**
* Sets up some additional counters for reporting by the client to the
* {@link ILoadBalancerService}.
*
* @todo in the base class also?
*/
public CounterSet getCounters() {
final CounterSet counterSet = new CounterSet();
/*
* Elapsed ms since the start of the load up to and until the end of the
* load.
*/
counterSet.addCounter("elapsed", new Instrument() {
@Override
protected void sample() {
final long elapsed = elapsed();
setValue(elapsed);
}
});
/*
* Note: This is the #of told triples read by _this_ client.
*
* When you are loading using multiple instances of the concurrent data
* loader, then the total #of told triples is the aggregation across all
* of those instances.
*/
counterSet.addCounter("toldTriplesLoaded", new Instrument() {
@Override
protected void sample() {
setValue(toldTriples.get());
}
});
/*
* Note: This is the told triples per second rate for _this_ client only
* since it is based on the triples read by the threads for this client.
*
* When you are loading using multiple instances of the concurrent data
* loader, then the total told triples per second rate is the
* aggregation across all of those instances.
*/
counterSet.addCounter("toldTriplesPerSec", new Instrument() {
@Override
protected void sample() {
final long elapsed = elapsed();
final double tps = (long) (((double) toldTriples.get())
/ ((double) elapsed) * 1000d);
setValue((long) tps);
}
});
if (bufferFactory instanceof IAsynchronousWriteStatementBufferFactory) {
counterSet
.attach(((IAsynchronousWriteStatementBufferFactory) bufferFactory)
.getCounters());
}
return counterSet;
}
/**
* Report totals.
*
* Note: these totals reflect the actual state of the database, not just
* the #of triples written by this client. Therefore if there are
* concurrent writers then the apparent TPS here will be higher than was
* reported by the counters for just this client -- all writers on the
* database will have been attributed to just this client.
*/
public String reportTotals() {
// total run time.
final long elapsed = elapsed();
final long nterms = db.getTermCount();
final long nstmts = db.getStatementCount();
final double tps = (long) (((double) nstmts) / ((double) elapsed) * 1000d);
return "Database: #terms=" + nterms + ", #stmts=" + nstmts
+ ", rate=" + tps + " in " + elapsed + " ms.";
}
}