All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.store.TestInsertRate Maven / Gradle / Ivy

There is a newer version: 2.1.4
Show newest version
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.store;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;

import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;

import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator;

/**
 * A test for measuring the possible insert rate for a triple store based on a
 * journal and btree using a synthetic data generator.
 * 

* * The s:p:o of the statements can use random selection without replacement from * a class space, a property space, and a literal space. Those spaces can be * pre-populated such that we can insert up to ( #class * #prop * #value ) * distinct statements if we select subjects from the class space, predicates * from the property space, and objects from the literal space and the class * space. However, we should also select objects from the class space, which * produces additional distinct statements. The literal space should be divided * into plain literals, literals with language tags, and typed literals. The * space of types are the predefined XSD types plus those defined by RDFS * (rdf:xml). *

* * In order to test plain RDF insert, we do not need to do anything beyond this. *

* * In order to test RDFS insert, there needs to be some ontology. This can be * introduced by creating a class hierarchy from the class space and a property * heirarchy from the property space. Such a hierarchy could be formed by either * by inserting or removing rdfs:subClassOf (or rdfs:subPropertyOf) assertions * from a fully connected matrix in the appropriate space. *

* * FIXME modify to use variable length unsigned byte[] keys and the * {@link UnsignedByteArrayComparator} and see how that effects performance - * the performance will be the base line on which I can then improve. Once I * have that baseline I can then go into the btree code and strip out the * polymorphic keys (except maybe int and long) and add in support for prefix * btrees and choosing short separators. * * @author Bryan Thompson * @version $Id$ */ public class TestInsertRate extends AbstractTripleStoreTestCase { /** * */ public TestInsertRate() { } /** * @param name */ public TestInsertRate(String name) { super(name); } /** * Large scale insert test. * * @param args * unused - just edit the code. */ public static void main(String[] args) throws Exception { // // small // int nclass = 30; // int nproperty = 20; // int nliteral = 20; // int litsize = 100; // moderate // int nclass = 300; // @todo at 300 this will force the journal to be extended on commit. // int nproperty = 20; // int nliteral = 20; // int litsize = 100; // large int nclass = 5000; int nproperty = 20; int nliteral = 30; // int nliteral = 0; int litsize = 300; TestInsertRate test = new TestInsertRate("TestInsertRate"); test.setUp(); test.doTest( nclass, nproperty, nliteral, litsize ); test.tearDown(); } /** * Defines a variety of URIs relevant to the XML Schema Datatypes * specification. */ static public class XMLSchema { /** * The namespace name, commonly associated with the prefix "rdf", whose * value is "http://www.w3.org/1999/02/22-rdf-syntax-ns#". */ public static final String NAMESPACE_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; /** * The namespace name, commonly associated with the prefix "rdfs", whose * value is "http://www.w3.org/2000/01/rdf-schema#". */ public static final String NAMESPACE_RDFS = "http://www.w3.org/2000/01/rdf-schema#"; /** * The URI,commonly written as rdf:XMLLiteral, whose value is * "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral". */ public static final String RDF_XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"; /** * The namespace name, commonly associated with the prefix "xsd", whose * value is "http://www.w3.org/2001/XMLSchema#". * * @todo [http://www.w3.org/2001/XMLSchema-datatypes] is a synonym for * the same namespace.... */ public static final String NAMESPACE_XSD = "http://www.w3.org/2001/XMLSchema#"; /** * The XSDatatype URI for "boolean". */ public static final String xsBoolean = NAMESPACE_XSD + "boolean"; /** * The XSDatatype URI for "byte". */ public static final String xsByte = NAMESPACE_XSD + "byte"; /** * The XSDatatype URI for "short". */ public static final String xsShort = NAMESPACE_XSD + "short"; /** * The XSDatatype URI for "int". */ public static final String xsInt = NAMESPACE_XSD + "int"; /** * The XSDatatype URI for "lomg". */ public static final String xsLong = NAMESPACE_XSD + "long"; /** * The XSDatatype URI for "float". */ public static final String xsFloat = NAMESPACE_XSD + "float"; /** * The XSDatatype URI for "double". */ public static final String xsDouble = NAMESPACE_XSD + "double"; /** * The XSDatatype URI for "integer" - used for {@link * java.math.BigInteger} and natively by the XSD library for * {@link com.sun.msv.datatype.xsd.IntegerValueType}. */ public static final String xsInteger = NAMESPACE_XSD + "integer"; /** * The XSDatatype URI for "decimal" - used for {@link * java.math.BigDecimal} */ public static final String xsDecimal = NAMESPACE_XSD + "decimal"; /** * The XSDatatype URI for "string". */ public static final String xsString = NAMESPACE_XSD + "string"; /** * The XSDatatype URI for "anyURI". */ public static final String xsAnyURI = NAMESPACE_XSD + "anyURI"; } /** * Primary driver for the insert rate test. * * @param nclass * The #of distinct classes. * * @param nproperty * The #of distinct properties. * * @param nliteral * The #of plain literals, the #of literals for each language * type, and the #of typed literals for each datatype URI. * * @param litsize * The average size of a literal. The generated literals use a * normal distribution with this as their mean length (in * characters). */ public void doTest(final int nclass, final int nproperty, final int nliteral, final int litsize) throws IOException { AbstractTripleStore store = getStore(); try { doTest(store, nclass, nproperty, nliteral, litsize); } finally { store.__tearDownUnitTest(); } } private void doTest(AbstractTripleStore store, final int nclass, final int nproperty, final int nliteral, final int litsize) throws IOException { final URI[] cspace = new URI[nclass]; final URI[] pspace = new URI[nproperty]; final URI[] tspace = new URI[] { // uncomment to get data typed literals. // new URIImpl( XMLSchema.xsInteger ), // new URIImpl( XMLSchema.xsFloat ) }; final String[] langSpace = new String[]{ // uncomment to get language typed literals. // "en","de" }; final int nliteral2 = nliteral + nliteral * tspace.length + nliteral * langSpace.length ; final Literal[] lspace = new Literal[nliteral2]; final int nvalues = nclass + nproperty + nliteral2; if( true ) { final long begin = System.currentTimeMillis(); final ValueFactory fac = store.getValueFactory(); log.info( "\nCreating "+nvalues+" values..." ); for( int i=0; i 0 && index % 10000 == 0 ) { System.err.print( "." ); if( index % 100000 == 0 ) { long now = System.currentTimeMillis(); long elapsed = now - begin2; begin2 = now; // reset. w.write ( ""+index+"\t"+index2+"\t"+elapsed+"\t"+perSec(index2,elapsed)+"\n" ); w.flush(); log.info ( "\nCurrent insert rate"+ ": #statements(so far)="+index+ ": #statements(interval)="+index2+ ", elapsed(interval)="+elapsed+ ", stmts/sec="+perSec(index2,elapsed) ); index2 = 0; // reset. // m_repo.startTransaction(); } } index++; index2++; } } } sbuf.flush(); long elapsed = System.currentTimeMillis() - begin; w.write ( "Sustained insert rate"+ ": #statements="+index+ ", elapsed="+elapsed+ ", stmts/sec="+perSec(index,elapsed)+"\n" ); log.info ( "\nSustained insert rate"+ ": #statements="+index+ ", elapsed="+elapsed+ ", stmts/sec="+perSec(index,elapsed) ); w.flush(); w.close(); } /** * Returns a random but unique value within the identified type * space. * * @param t The data type URI. * * @param id A unique index used to obtain a unique value in the * identified type space. Typically this is a one up identifier. */ private String getRandomType( URI t, int id ) { // FIXME This needs to be type sensitive. For some types, the // size of the type space is of necessity limited. For such // types I imagine that this method needs to recycle values, // which results in a net reduction in the size of the overal // literal space and hence the #of distinct statements that // can be made. return ""+id; } /** * Returns the quantity n expressed as a per-second rate or * "N/A" if the elapsed time is zero. */ static final public String perSec( final int n, final long elapsed ) { if( n == 0 ) return "0"; return ((elapsed==0?"N/A":""+(int)(n/(elapsed/1000.)))); } /** * Returns a writer named by the test and having the specified * filename extension. */ public Writer getWriter( String ext ) throws IOException { return new BufferedWriter ( new FileWriter ( getName()+ext ) ); } /** * @todo write tests for the individual indices, restart safety, * concurrent writers, and concurrent writes with concurrent * query. */ public void test_tiny() throws IOException { // tiny int nclass = 3; int nproperty = 2; int nliteral = 2; int litsize = 100; doTest( nclass, nproperty, nliteral, litsize ); } public void test_small() throws IOException { int nclass = 30; int nproperty = 20; int nliteral = 20; int litsize = 100; doTest( nclass, nproperty, nliteral, litsize ); } // /** // * This tests nearly a million triples. // * // *

//         * Sustained insert rate: #statements=880000, elapsed=7422, stmts/sec=118566
//         * 
// */ // public void test_moderate() throws IOException { // // int nclass = 200; // @todo at 300 this will force the journal to be extended on commit. // int nproperty = 20; // int nliteral = 20; // int litsize = 100; // // doTest( nclass, nproperty, nliteral, litsize ); // //// // moderate. //// int nclass = 5000; //// int nproperty = 20; //// int nliteral = 30; ////// int nliteral = 0; //// int litsize = 300; // // } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy