com.bigdata.rdf.rio.TestAsynchronousStatementBufferFactory Maven / Gradle / Ivy
Show all versions of bigdata-rdf-test Show documentation
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Apr 18, 2009
*/
package com.bigdata.rdf.rio;
import java.io.File;
import java.util.Iterator;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.openrdf.rio.RDFFormat;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.lexicon.BigdataValueCentricFullTextIndex;
import com.bigdata.rdf.lexicon.LexiconKeyOrder;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.rdf.store.TestScaleOutTripleStoreWithEmbeddedFederation;
import com.bigdata.rdf.util.DumpLexicon;
import com.bigdata.service.AbstractScaleOutFederation;
import com.bigdata.service.EmbeddedClient;
import com.bigdata.service.IBigdataClient;
/**
* Test suite for {@link AsynchronousStatementBufferFactory}. To run this test
* by itself specify
* -DtestClass=com.bigdata.rdf.store.TestScaleOutTripleStoreWithEmbeddedFederation
* .
*
* @author Bryan Thompson
* @version $Id$
*
* FIXME variant to test w/ and w/o the full text index (with lookup by
* tokens).
*
* FIXME variant to test async w/ sids (once written).
*
* @todo The async API is only defined at this time for scale-out index views,
* so maybe move this into the scale-out proxy test suite.
*
* @see TestScaleOutTripleStoreWithEmbeddedFederation
*/
public class TestAsynchronousStatementBufferFactory extends
AbstractRIOTestCase {
/**
*
*/
public TestAsynchronousStatementBufferFactory() {
}
/**
* @param name
*/
public TestAsynchronousStatementBufferFactory(String name) {
super(name);
}
private static final int chunkSize = 20000;
private static final int valuesInitialCapacity = 10000;
private static final int bnodesInitialCapacity = 16;
private static final long unbufferedStatementThreshold = 5000L;//Long.MAX_VALUE;
private static final long rejectedExecutionDelay = 250L; // milliseconds.
/**
* SHOULD be true
since the whole point of this is higher
* concurrency. If you set this to false
to explore some
* issue, then change it back to true
when you are done!
*/
private static final boolean parallel = true;
// protected AbstractTripleStore getStore() {
//
// return getStore(getProperties());
//
// }
/**
* Note: This is overridden to turn off features not supported by this
* loader.
*/
public Properties getProperties() {
final Properties properties = new Properties(super.getProperties());
// Disable reporting.
properties.setProperty(IBigdataClient.Options.REPORT_DELAY, "0");
properties.setProperty(IBigdataClient.Options.COLLECT_QUEUE_STATISTICS, "false");
properties.setProperty(IBigdataClient.Options.COLLECT_PLATFORM_STATISTICS, "false");
// One DS is enough.
properties.setProperty(EmbeddedClient.Options.NDATA_SERVICES, "1");
// Minimize the #of threads so things are simpler to debug.
properties.setProperty(ConcurrencyManager.Options.DEFAULT_WRITE_SERVICE_CORE_POOL_SIZE,"0");
properties.setProperty(AbstractTripleStore.Options.TEXT_INDEX, "true");
properties.setProperty(AbstractTripleStore.Options.STATEMENT_IDENTIFIERS, "false");
// properties.setProperty(AbstractTripleStore.Options.QUADS, "true");
// no closure so we don't need the axioms either.
properties.setProperty(AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName());
// enable a vocabulary so some things will be inlined.
properties.setProperty(AbstractTripleStore.Options.VOCABULARY_CLASS,
AbstractTripleStore.Options.DEFAULT_VOCABULARY_CLASS);
{
/*
* FIXME We MUST specify the KB namespace so we can override this
* property. [Another approach is to override the idle timeout and
* have it be less than the chunk timeout such that the sink is
* closed if it becomes idle (no new chunks appearing) but continues
* to combine chunks as long as they are appearing before the idle
* timeout.
*/
final String namespace = "test1";
{
final String pname = com.bigdata.config.Configuration
.getOverrideProperty(namespace + "."
+ LexiconRelation.NAME_LEXICON_RELATION + "."
+ LexiconKeyOrder.TERM2ID,
IndexMetadata.Options.SINK_IDLE_TIMEOUT_NANOS);
final String pval = "" + TimeUnit.SECONDS.toNanos(1);
if (log.isInfoEnabled())
log.info("Override: " + pname + "=" + pval);
// Put an idle timeout on the sink of 1s.
properties.setProperty(pname, pval);
}
{
final String pname = com.bigdata.config.Configuration
.getOverrideProperty(namespace + "."
+ LexiconRelation.NAME_LEXICON_RELATION + "."
+ LexiconKeyOrder.BLOBS,
IndexMetadata.Options.SINK_IDLE_TIMEOUT_NANOS);
final String pval = "" + TimeUnit.SECONDS.toNanos(1);
if (log.isInfoEnabled())
log.info("Override: " + pname + "=" + pval);
// Put an idle timeout on the sink of 1s.
properties.setProperty(pname, pval);
}
}
// @todo comment out or will fail during verify.
// properties.setProperty(AbstractTripleStore.Options.ONE_ACCESS_PATH, "true");
return properties;
}
/**
* Test with the "small.rdf" data set.
*/
public void test_loadAndVerify_small() throws Exception {
final String resource = "/com/bigdata/rdf/rio/small.rdf";
doLoadAndVerifyTest(resource, getProperties());
}
/**
* Test with the "small.rdf" data set in quads mode.
*/
public void test_loadAndVerify_small_quadsMode() throws Exception {
final String resource = "/com/bigdata/rdf/rio/small.rdf";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(resource, p);
}
/**
* Test with the "little.ttl" data set in quads mode (triples data loaded
* into a quads mode kb).
*/
public void test_loadAndVerify_little_ttl_quadsMode() throws Exception {
final String resource = "/com/bigdata/rdf/rio/little.ttl";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(resource, p);
}
/**
* Test with the "little.trig" data set in quads mode (quads data loaded
* into a quads mode kb)
*/
public void test_loadAndVerify_little_trig_quadsMode() throws Exception {
final String resource = "/com/bigdata/rdf/rio/little.trig";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(resource, p);
}
/**
* Test with the "smallWithBlobs.rdf" data set.
*/
public void test_loadAndVerify_smallWithBlobs() throws Exception {
final String resource = "/com/bigdata/rdf/rio/smallWithBlobs.rdf";
doLoadAndVerifyTest(resource, getProperties());
}
/**
* Test with the "smallWithBlobs.rdf" data set in quads mode.
*/
public void test_loadAndVerify_smallWithBlobs_quadsMode() throws Exception {
final String resource = "/com/bigdata/rdf/rio/smallWithBlobs.rdf";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(resource, p);
}
/**
* Test with the "broken.rdf" data set (does not contain valid RDF). This
* tests that the factory will shutdown correctly if there are processing
* errors.
*
* @throws Exception
*/
public void test_loadFails() throws Exception {
final String resource = "/com/bigdata/rdf/rio/broken.rdf";
final AbstractTripleStore store = getStore();
try {
if(!(store.getIndexManager() instanceof AbstractScaleOutFederation)) {
log.warn("Test requires scale-out index views.");
return;
}
if (store.isQuads()) {
log.warn("Quads not supported yet.");
return;
}
// only do load since we expect an error to be reported.
final AsynchronousStatementBufferFactory factory = doLoad2(
store, new File(resource), parallel);
assertEquals("errorCount", 1, factory.getDocumentErrorCount());
} finally {
store.__tearDownUnitTest();
}
}
/**
* Test with the "sample data.rdf" data set.
*/
public void test_loadAndVerify_sampleData() throws Exception {
final String resource = "/com/bigdata/rdf/rio/sample data.rdf";
doLoadAndVerifyTest( resource, getProperties() );
}
/**
* Test with the "sample data.rdf" data set in quads mode.
*/
public void test_loadAndVerify_sampleData_quadsMode() throws Exception {
final String resource = "/com/bigdata/rdf/rio/sample data.rdf";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest( resource, p );
}
/**
* Uses a modest file (~40k statements). This is BSBM data so it has some
* BLOBs in it.
*/
public void test_loadAndVerify_bsbm_pc100() throws Exception {
final String file = "/data/bsbm/dataset_pc100.nt";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(file, p);
}
/**
* Uses a modest file (~40k statements). This is BSBM data so it has some
* BLOBs in it. This loads the data in quads mode.
*/
public void test_loadAndVerify_bsbm_pc100_quadsMode() throws Exception {
final String file = "/data/bsbm/dataset_pc100.nt";
final Properties p = getProperties();
p.setProperty(AbstractTripleStore.Options.QUADS, "true");
doLoadAndVerifyTest(file, p);
}
// /**
// * LUBM U(1).
// *
// * Note: This unit test can hang under JDK 1.6.0_17 if you have been running
// * the entire test suite and you do not specify -XX:+UseMembar
// * to the JVM. This is a JVM bug. The -XX:+UseMembar
option is
// * the workaround. [This is also very slow to run, especially with the lexicon
// * validation.]
// */
// public void test_loadAndVerify_U1() throws Exception {
//
// final String file = "/data/lehigh/U1";
//
// doLoadAndVerifyTest(file, getProperties());
//
// }
// /**
// * Do not leave this unit test in -- it takes too long to validate the
// * loaded data: LUBM U(10)
// */
// public void test_loadAndVerify_U10() throws Exception {
//
// final String file = "../rdf-data/lehigh/U10";
//
// doLoadAndVerifyTest(file);
//
// }
/**
* Test loads an RDF/XML resource into a database and then verifies by
* re-parse that all expected statements were made persistent in the
* database.
*
* @param resource
*
* @throws Exception
*/
protected void doLoadAndVerifyTest(final String resource,
final Properties properties) throws Exception {
final AbstractTripleStore store = getStore(properties);
try {
if (!(store.getIndexManager() instanceof AbstractScaleOutFederation)) {
log.warn("Test requires scale-out index views.");
return;
}
doLoad(store, resource, parallel);
if (log.isDebugEnabled()) {
log.debug("dumping store...");
log.debug("LEXICON:\n"
+ DumpLexicon.dump(store.getLexiconRelation()));
if (store.getLexiconRelation().isTextIndex()) {
// Full text index.
final ITupleIterator> itr = ((BigdataValueCentricFullTextIndex) store
.getLexiconRelation().getSearchEngine()).getIndex()
.rangeIterator();
while (itr.hasNext()) {
log.debug(itr.next().getObject());
}
}
// raw statement indices.
{
final Iterator itr = store.isQuads() ? SPOKeyOrder
.quadStoreKeyOrderIterator() : SPOKeyOrder
.tripleStoreKeyOrderIterator();
while (itr.hasNext()) {
final SPOKeyOrder keyOrder = itr.next();
log.debug("\n---" + keyOrder + "---\n"
+ store.getSPORelation().dump(keyOrder));
}
}
// resolved statement indices.
{
final Iterator itr = store.isQuads() ? SPOKeyOrder
.quadStoreKeyOrderIterator() : SPOKeyOrder
.tripleStoreKeyOrderIterator();
while (itr.hasNext()) {
final SPOKeyOrder keyOrder = itr.next();
log.debug("\n" + keyOrder + "\n"
+ store.getSPORelation().dump(keyOrder));
log.debug("\n---"
+ keyOrder
+ "---\n"
+ store.dumpStore(store/* resolveTerms */,
true/* explicit */, true/* inferred */,
true/* axioms */, true/* history */,
true/* justifications */, true/* sids */,
keyOrder));
}
}
}
doVerify(store, resource, parallel);
} finally {
store.__tearDownUnitTest();
}
}
/**
* Load using {@link AsynchronousStatementBufferWithoutSids2}.
*/
protected void doLoad(final AbstractTripleStore store,
final String resource, final boolean parallel) throws Exception {
doLoad2(store, new File(resource), parallel);
}
/**
* Load using {@link AsynchronousStatementBufferFactory}.
*/
protected AsynchronousStatementBufferFactory doLoad2(
final AbstractTripleStore store, final File resource,
final boolean parallel) throws Exception {
final RDFParserOptions parserOptions = new RDFParserOptions();
parserOptions.setVerifyData(false);
final AsynchronousStatementBufferFactory statementBufferFactory = new AsynchronousStatementBufferFactory(
(ScaleOutTripleStore) store,//
chunkSize, //
valuesInitialCapacity,//
bnodesInitialCapacity,//
RDFFormat.RDFXML, // defaultFormat
null, // defaultGraph
parserOptions, //
false, // deleteAfter
parallel?5:1, // parserPoolSize,
20, // parserQueueCapacity
parallel?5:1, // term2IdWriterPoolSize,
parallel?5:1, // otherWriterPoolSize
parallel?5:1, // notifyPoolSize
unbufferedStatementThreshold
);
// final AsynchronousWriteBufferFactoryWithoutSids2 statementBufferFactory = new AsynchronousWriteBufferFactoryWithoutSids2(
// (ScaleOutTripleStore) store, chunkSize, valuesInitialCapacity,
// bnodesInitialCapacity);
try {
// tasks to load the resource or file(s)
if (resource.isDirectory()) {
statementBufferFactory.submitAll(resource,
new com.bigdata.rdf.load.RDFFilenameFilter(),
rejectedExecutionDelay);
} else {
statementBufferFactory.submitOne(resource);
}
// wait for the async writes to complete.
statementBufferFactory.awaitAll();
// dump write statistics for indices used by kb.
// System.err.println(((AbstractFederation) store.getIndexManager())
// .getServiceCounterSet().getPath("Indices").toString());
// dump factory specific counters.
System.err.println(statementBufferFactory.getCounters().toString());
} catch (Throwable t) {
statementBufferFactory.cancelAll(true/* mayInterruptIfRunning */);
// rethrow
throw new RuntimeException(t);
}
return statementBufferFactory;
}
}