All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.load.RDFLoadTaskFactory Maven / Gradle / Ivy

package com.bigdata.rdf.load;

import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;

import com.bigdata.counters.CounterSet;
import com.bigdata.counters.Instrument;
import com.bigdata.rdf.rio.IAsynchronousWriteStatementBufferFactory;
import com.bigdata.rdf.rio.RDFParserOptions;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.service.ILoadBalancerService;

/**
 * Factory for tasks for loading RDF resources into a database.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class RDFLoadTaskFactory extends
        AbstractRDFTaskFactory {
    
    /**
     * 
     * @param db
     * @param bufferCapacity
     * @param verifyData
     * @param deleteAfter
     *            if the file should be deleted once it has been loaded.
     * @param fallback
     *            An attempt will be made to determine the interchange syntax
     *            using {@link RDFFormat}. If no determination can be made then
     *            the loader will presume that the files are in the format
     *            specified by this parameter (if any). Files whose format can
     *            not be determined will be logged as errors.
     * 
     * @todo drop the writeBuffer arg.
     */
    public RDFLoadTaskFactory(final AbstractTripleStore db,
            final int bufferCapacity, final RDFParserOptions parserOptions,
            final boolean deleteafter, final RDFFormat fallback) {

        this(db, parserOptions, deleteafter, fallback,
                new LoadStatementBufferFactory(db, bufferCapacity));

    }

    /**
     * 
     * @param db
     * @param verifyData
     * @param deleteAfter
     *            if the file should be deleted once it has been loaded.
     * @param fallback
     *            An attempt will be made to determine the interchange syntax
     *            using {@link RDFFormat}. If no determination can be made then
     *            the loader will presume that the files are in the format
     *            specified by this parameter (if any). Files whose format can
     *            not be determined will be logged as errors.
     * @param factory
     *            Used to buffer and load statements.
     */
    public RDFLoadTaskFactory(final AbstractTripleStore db,
            final RDFParserOptions parserOptions, final boolean deleteafter,
            final RDFFormat fallback, IStatementBufferFactory factory) {

        super(db, parserOptions, deleteafter, fallback, factory);

    }

    /**
     * Sets up some additional counters for reporting by the client to the
     * {@link ILoadBalancerService}.
     * 
     * @todo in the base class also?
     */
    public CounterSet getCounters() {

        final CounterSet counterSet = new CounterSet();

        /*
         * Elapsed ms since the start of the load up to and until the end of the
         * load.
         */
        counterSet.addCounter("elapsed", new Instrument() {

            @Override
            protected void sample() {

                final long elapsed = elapsed();

                setValue(elapsed);

            }
        });

        /*
         * Note: This is the #of told triples read by _this_ client.
         * 
         * When you are loading using multiple instances of the concurrent data
         * loader, then the total #of told triples is the aggregation across all
         * of those instances.
         */
        counterSet.addCounter("toldTriplesLoaded", new Instrument() {

            @Override
            protected void sample() {

                setValue(toldTriples.get());

            }
        });

        /*
         * Note: This is the told triples per second rate for _this_ client only
         * since it is based on the triples read by the threads for this client.
         * 
         * When you are loading using multiple instances of the concurrent data
         * loader, then the total told triples per second rate is the
         * aggregation across all of those instances.
         */
        counterSet.addCounter("toldTriplesPerSec", new Instrument() {

            @Override
            protected void sample() {

                final long elapsed = elapsed();

                final double tps = (long) (((double) toldTriples.get())
                        / ((double) elapsed) * 1000d);

                setValue((long) tps);

            }
        });

        if (bufferFactory instanceof IAsynchronousWriteStatementBufferFactory) {

            counterSet
                    .attach(((IAsynchronousWriteStatementBufferFactory) bufferFactory)
                            .getCounters());

        }

        return counterSet;

    }
    
    /**
     * Report totals.
     * 

* Note: these totals reflect the actual state of the database, not just * the #of triples written by this client. Therefore if there are * concurrent writers then the apparent TPS here will be higher than was * reported by the counters for just this client -- all writers on the * database will have been attributed to just this client. */ public String reportTotals() { // total run time. final long elapsed = elapsed(); final long nterms = db.getTermCount(); final long nstmts = db.getStatementCount(); final double tps = (long) (((double) nstmts) / ((double) elapsed) * 1000d); return "Database: #terms=" + nterms + ", #stmts=" + nstmts + ", rate=" + tps + " in " + elapsed + " ms."; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy