All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.rio.TestAsynchronousStatementBufferFactory Maven / Gradle / Ivy

There is a newer version: 2.1.4
Show newest version
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Apr 18, 2009
 */

package com.bigdata.rdf.rio;

import java.io.File;
import java.util.Iterator;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import org.openrdf.rio.RDFFormat;

import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.lexicon.BigdataValueCentricFullTextIndex;
import com.bigdata.rdf.lexicon.LexiconKeyOrder;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.rdf.store.TestScaleOutTripleStoreWithEmbeddedFederation;
import com.bigdata.rdf.util.DumpLexicon;
import com.bigdata.service.AbstractScaleOutFederation;
import com.bigdata.service.EmbeddedClient;
import com.bigdata.service.IBigdataClient;

/**
 * Test suite for {@link AsynchronousStatementBufferFactory}. To run this test
 * by itself specify
 * -DtestClass=com.bigdata.rdf.store.TestScaleOutTripleStoreWithEmbeddedFederation
 * .
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 *          FIXME variant to test w/ and w/o the full text index (with lookup by
 *          tokens).
 * 
 *          FIXME variant to test async w/ sids (once written).
 *          
 * @todo The async API is only defined at this time for scale-out index views,
 *       so maybe move this into the scale-out proxy test suite.
 * 
 * @see TestScaleOutTripleStoreWithEmbeddedFederation
 */
public class TestAsynchronousStatementBufferFactory extends
        AbstractRIOTestCase {

    /**
     * 
     */
    public TestAsynchronousStatementBufferFactory() {
    }

    /**
     * @param name
     */
    public TestAsynchronousStatementBufferFactory(String name) {
        super(name);
    }

    private static final int chunkSize = 20000;
    private static final int valuesInitialCapacity = 10000;
    private static final int bnodesInitialCapacity = 16;
    private static final long unbufferedStatementThreshold = 5000L;//Long.MAX_VALUE;
    private static final long rejectedExecutionDelay = 250L; // milliseconds.
    
    /**
     * SHOULD be true since the whole point of this is higher
     * concurrency. If you set this to false to explore some
     * issue, then change it back to true when you are done!
     */
    private static final boolean parallel = true;
    
//    protected AbstractTripleStore getStore() {
//
//        return getStore(getProperties());
//
//    }

    /**
     * Note: This is overridden to turn off features not supported by this
     * loader.
     */
    public Properties getProperties() {

        final Properties properties = new Properties(super.getProperties());

        // Disable reporting.
        properties.setProperty(IBigdataClient.Options.REPORT_DELAY, "0");
        properties.setProperty(IBigdataClient.Options.COLLECT_QUEUE_STATISTICS, "false");
        properties.setProperty(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS, "false");

        // One DS is enough.
        properties.setProperty(EmbeddedClient.Options.NDATA_SERVICES, "1");

        // Minimize the #of threads so things are simpler to debug.
        properties.setProperty(ConcurrencyManager.Options.DEFAULT_WRITE_SERVICE_CORE_POOL_SIZE,"0");
        
        properties.setProperty(AbstractTripleStore.Options.TEXT_INDEX, "true");

        properties.setProperty(AbstractTripleStore.Options.STATEMENT_IDENTIFIERS, "false");

//        properties.setProperty(AbstractTripleStore.Options.QUADS, "true");

        // no closure so we don't need the axioms either.
        properties.setProperty(AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName());

        // enable a vocabulary so some things will be inlined.
        properties.setProperty(AbstractTripleStore.Options.VOCABULARY_CLASS,
                AbstractTripleStore.Options.DEFAULT_VOCABULARY_CLASS);

        {

            /*
             * FIXME We MUST specify the KB namespace so we can override this
             * property. [Another approach is to override the idle timeout and
             * have it be less than the chunk timeout such that the sink is
             * closed if it becomes idle (no new chunks appearing) but continues
             * to combine chunks as long as they are appearing before the idle
             * timeout.
             */
            final String namespace = "test1";

            {
                final String pname = com.bigdata.config.Configuration
                        .getOverrideProperty(namespace + "."
                                + LexiconRelation.NAME_LEXICON_RELATION + "."
                                + LexiconKeyOrder.TERM2ID,
                                IndexMetadata.Options.SINK_IDLE_TIMEOUT_NANOS);

                final String pval = "" + TimeUnit.SECONDS.toNanos(1);

                if (log.isInfoEnabled())
                    log.info("Override: " + pname + "=" + pval);

                // Put an idle timeout on the sink of 1s.
                properties.setProperty(pname, pval);
            }

            {
                final String pname = com.bigdata.config.Configuration
                        .getOverrideProperty(namespace + "."
                                + LexiconRelation.NAME_LEXICON_RELATION + "."
                                + LexiconKeyOrder.BLOBS,
                                IndexMetadata.Options.SINK_IDLE_TIMEOUT_NANOS);

                final String pval = "" + TimeUnit.SECONDS.toNanos(1);

                if (log.isInfoEnabled())
                    log.info("Override: " + pname + "=" + pval);

                // Put an idle timeout on the sink of 1s.
                properties.setProperty(pname, pval);
            }

        }
        
        // @todo comment out or will fail during verify.
//        properties.setProperty(AbstractTripleStore.Options.ONE_ACCESS_PATH, "true");

        return properties;
        
    }
    
    /**
     * Test with the "small.rdf" data set.
     */
    public void test_loadAndVerify_small() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/small.rdf";

        doLoadAndVerifyTest(resource, getProperties());

    }

    /**
     * Test with the "small.rdf" data set in quads mode.
     */
    public void test_loadAndVerify_small_quadsMode() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/small.rdf";

        final Properties p = getProperties();
        
        p.setProperty(AbstractTripleStore.Options.QUADS, "true");
        
        doLoadAndVerifyTest(resource, p);

    }

    /**
     * Test with the "little.ttl" data set in quads mode (triples data loaded
     * into a quads mode kb).
     */
    public void test_loadAndVerify_little_ttl_quadsMode() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/little.ttl";

        final Properties p = getProperties();
        
        p.setProperty(AbstractTripleStore.Options.QUADS, "true");
        
        doLoadAndVerifyTest(resource, p);

    }
    
    /**
     * Test with the "little.trig" data set in quads mode (quads data loaded
     * into a quads mode kb)
     */
    public void test_loadAndVerify_little_trig_quadsMode() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/little.trig";

        final Properties p = getProperties();
        
        p.setProperty(AbstractTripleStore.Options.QUADS, "true");
        
        doLoadAndVerifyTest(resource, p);

    }

    /**
     * Test with the "smallWithBlobs.rdf" data set.
     */
    public void test_loadAndVerify_smallWithBlobs() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/smallWithBlobs.rdf";

        doLoadAndVerifyTest(resource, getProperties());

    }

    /**
     * Test with the "smallWithBlobs.rdf" data set in quads mode.
     */
    public void test_loadAndVerify_smallWithBlobs_quadsMode() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/smallWithBlobs.rdf";

        final Properties p = getProperties();
        
        p.setProperty(AbstractTripleStore.Options.QUADS, "true");

        doLoadAndVerifyTest(resource, p);

    }

    /**
     * Test with the "broken.rdf" data set (does not contain valid RDF). This
     * tests that the factory will shutdown correctly if there are processing
     * errors.
     * 
     * @throws Exception
     */
    public void test_loadFails() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/broken.rdf";

        final AbstractTripleStore store = getStore();
        try {
            
            if(!(store.getIndexManager() instanceof AbstractScaleOutFederation)) {
                
                log.warn("Test requires scale-out index views.");
                
                return;
                
            }

            if (store.isQuads()) {

                log.warn("Quads not supported yet.");
                
                return;
                
            }

            // only do load since we expect an error to be reported.
            final AsynchronousStatementBufferFactory factory = doLoad2(
                    store, new File(resource), parallel);
            
            assertEquals("errorCount", 1, factory.getDocumentErrorCount());
            
        } finally {
            
            store.__tearDownUnitTest();
            
        }

    }

    /**
     * Test with the "sample data.rdf" data set.
     */
    public void test_loadAndVerify_sampleData() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/sample data.rdf";

        doLoadAndVerifyTest( resource, getProperties() );
        
    }
   
    /**
     * Test with the "sample data.rdf" data set in quads mode.
     */
    public void test_loadAndVerify_sampleData_quadsMode() throws Exception {
        
        final String resource = "/com/bigdata/rdf/rio/sample data.rdf";

        final Properties p = getProperties();

        p.setProperty(AbstractTripleStore.Options.QUADS, "true");

        doLoadAndVerifyTest( resource, p );
        
    }
   
    /**
     * Uses a modest file (~40k statements).  This is BSBM data so it has some
     * BLOBs in it.
     */
    public void test_loadAndVerify_bsbm_pc100() throws Exception {
        
        final String file = "/data/bsbm/dataset_pc100.nt";

        final Properties p = getProperties();

        p.setProperty(AbstractTripleStore.Options.QUADS, "true");

        doLoadAndVerifyTest(file, p);
        
    }

    /**
     * Uses a modest file (~40k statements). This is BSBM data so it has some
     * BLOBs in it. This loads the data in quads mode.
     */
    public void test_loadAndVerify_bsbm_pc100_quadsMode() throws Exception {

        final String file = "/data/bsbm/dataset_pc100.nt";

        final Properties p = getProperties();

        p.setProperty(AbstractTripleStore.Options.QUADS, "true");

        doLoadAndVerifyTest(file, p);

    }

//	/**
//	 * LUBM U(1).
//	 * 

// * Note: This unit test can hang under JDK 1.6.0_17 if you have been running // * the entire test suite and you do not specify -XX:+UseMembar // * to the JVM. This is a JVM bug. The -XX:+UseMembar option is // * the workaround. [This is also very slow to run, especially with the lexicon // * validation.] // */ // public void test_loadAndVerify_U1() throws Exception { // // final String file = "/data/lehigh/U1"; // // doLoadAndVerifyTest(file, getProperties()); // // } // /** // * Do not leave this unit test in -- it takes too long to validate the // * loaded data: LUBM U(10) // */ // public void test_loadAndVerify_U10() throws Exception { // // final String file = "../rdf-data/lehigh/U10"; // // doLoadAndVerifyTest(file); // // } /** * Test loads an RDF/XML resource into a database and then verifies by * re-parse that all expected statements were made persistent in the * database. * * @param resource * * @throws Exception */ protected void doLoadAndVerifyTest(final String resource, final Properties properties) throws Exception { final AbstractTripleStore store = getStore(properties); try { if (!(store.getIndexManager() instanceof AbstractScaleOutFederation)) { log.warn("Test requires scale-out index views."); return; } doLoad(store, resource, parallel); if (log.isDebugEnabled()) { log.debug("dumping store..."); log.debug("LEXICON:\n" + DumpLexicon.dump(store.getLexiconRelation())); if (store.getLexiconRelation().isTextIndex()) { // Full text index. final ITupleIterator itr = ((BigdataValueCentricFullTextIndex) store .getLexiconRelation().getSearchEngine()).getIndex() .rangeIterator(); while (itr.hasNext()) { log.debug(itr.next().getObject()); } } // raw statement indices. { final Iterator itr = store.isQuads() ? SPOKeyOrder .quadStoreKeyOrderIterator() : SPOKeyOrder .tripleStoreKeyOrderIterator(); while (itr.hasNext()) { final SPOKeyOrder keyOrder = itr.next(); log.debug("\n---" + keyOrder + "---\n" + store.getSPORelation().dump(keyOrder)); } } // resolved statement indices. { final Iterator itr = store.isQuads() ? SPOKeyOrder .quadStoreKeyOrderIterator() : SPOKeyOrder .tripleStoreKeyOrderIterator(); while (itr.hasNext()) { final SPOKeyOrder keyOrder = itr.next(); log.debug("\n" + keyOrder + "\n" + store.getSPORelation().dump(keyOrder)); log.debug("\n---" + keyOrder + "---\n" + store.dumpStore(store/* resolveTerms */, true/* explicit */, true/* inferred */, true/* axioms */, true/* history */, true/* justifications */, true/* sids */, keyOrder)); } } } doVerify(store, resource, parallel); } finally { store.__tearDownUnitTest(); } } /** * Load using {@link AsynchronousStatementBufferWithoutSids2}. */ protected void doLoad(final AbstractTripleStore store, final String resource, final boolean parallel) throws Exception { doLoad2(store, new File(resource), parallel); } /** * Load using {@link AsynchronousStatementBufferFactory}. */ protected AsynchronousStatementBufferFactory doLoad2( final AbstractTripleStore store, final File resource, final boolean parallel) throws Exception { final RDFParserOptions parserOptions = new RDFParserOptions(); parserOptions.setVerifyData(false); final AsynchronousStatementBufferFactory statementBufferFactory = new AsynchronousStatementBufferFactory( (ScaleOutTripleStore) store,// chunkSize, // valuesInitialCapacity,// bnodesInitialCapacity,// RDFFormat.RDFXML, // defaultFormat null, // defaultGraph parserOptions, // false, // deleteAfter parallel?5:1, // parserPoolSize, 20, // parserQueueCapacity parallel?5:1, // term2IdWriterPoolSize, parallel?5:1, // otherWriterPoolSize parallel?5:1, // notifyPoolSize unbufferedStatementThreshold ); // final AsynchronousWriteBufferFactoryWithoutSids2 statementBufferFactory = new AsynchronousWriteBufferFactoryWithoutSids2( // (ScaleOutTripleStore) store, chunkSize, valuesInitialCapacity, // bnodesInitialCapacity); try { // tasks to load the resource or file(s) if (resource.isDirectory()) { statementBufferFactory.submitAll(resource, new com.bigdata.rdf.load.RDFFilenameFilter(), rejectedExecutionDelay); } else { statementBufferFactory.submitOne(resource); } // wait for the async writes to complete. statementBufferFactory.awaitAll(); // dump write statistics for indices used by kb. // System.err.println(((AbstractFederation) store.getIndexManager()) // .getServiceCounterSet().getPath("Indices").toString()); // dump factory specific counters. System.err.println(statementBufferFactory.getCounters().toString()); } catch (Throwable t) { statementBufferFactory.cancelAll(true/* mayInterruptIfRunning */); // rethrow throw new RuntimeException(t); } return statementBufferFactory; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy