com.bigdata.rdf.store.TestInsertRate Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigdata-rdf-test Show documentation
Blazegraph(TM) RDF Test Suites
There is a newer version: 2.1.4
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.store;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;

import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;

import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator;

/**
 * A test for measuring the possible insert rate for a triple store based on a
 * journal and btree using a synthetic data generator.
 * 
 * 
 * The s:p:o of the statements can use random selection without replacement from
 * a class space, a property space, and a literal space. Those spaces can be
 * pre-populated such that we can insert up to ( #class * #prop * #value )
 * distinct statements if we select subjects from the class space, predicates
 * from the property space, and objects from the literal space and the class
 * space. However, we should also select objects from the class space, which
 * produces additional distinct statements. The literal space should be divided
 * into plain literals, literals with language tags, and typed literals. The
 * space of types are the predefined XSD types plus those defined by RDFS
 * (rdf:xml).
 * 

 * 
 * In order to test plain RDF insert, we do not need to do anything beyond this.
 * 

 * 
 * In order to test RDFS insert, there needs to be some ontology. This can be
 * introduced by creating a class hierarchy from the class space and a property
 * heirarchy from the property space. Such a hierarchy could be formed by either
 * by inserting or removing rdfs:subClassOf (or rdfs:subPropertyOf) assertions
 * from a fully connected matrix in the appropriate space.
 * 

 * 
 * FIXME modify to use variable length unsigned byte[] keys and the
 * {@link UnsignedByteArrayComparator} and see how that effects performance -
 * the performance will be the base line on which I can then improve. Once I
 * have that baseline I can then go into the btree code and strip out the
 * polymorphic keys (except maybe int and long) and add in support for prefix
 * btrees and choosing short separators.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class TestInsertRate extends AbstractTripleStoreTestCase {

    /**
     * 
     */
    public TestInsertRate() {
    }

    /**
     * @param name
     */
    public TestInsertRate(String name) {
        super(name);
    }

    /**
     * Large scale insert test.
     * 
     * @param args
     *            unused - just edit the code.
     */
    public static void main(String[] args) throws Exception {

//        // small
//        int nclass = 30;
//        int nproperty = 20;
//        int nliteral = 20;
//        int litsize = 100;

        // moderate
//        int nclass = 300; // @todo at 300 this will force the journal to be extended on commit.
//        int nproperty = 20;
//        int nliteral = 20;
//        int litsize = 100;

      // large
      int nclass = 5000;
      int nproperty = 20;
      int nliteral = 30;
//      int nliteral = 0;
      int litsize = 300;
    
        TestInsertRate test = new TestInsertRate("TestInsertRate");
        test.setUp();
        test.doTest( nclass, nproperty, nliteral, litsize );
        test.tearDown();
            
    }

    /**
     * Defines a variety of URIs relevant to the XML Schema Datatypes
     * specification.
     */

    static public class XMLSchema {

        /**
         * The namespace name, commonly associated with the prefix "rdf", whose
         * value is "http://www.w3.org/1999/02/22-rdf-syntax-ns#".
         */

        public static final String NAMESPACE_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";

        /**
         * The namespace name, commonly associated with the prefix "rdfs", whose
         * value is "http://www.w3.org/2000/01/rdf-schema#".
         */

        public static final String NAMESPACE_RDFS = "http://www.w3.org/2000/01/rdf-schema#";

        /**
         * The URI,commonly written as rdf:XMLLiteral, whose value is
         * "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral".
         */

        public static final String RDF_XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";

        /**
         * The namespace name, commonly associated with the prefix "xsd", whose
         * value is "http://www.w3.org/2001/XMLSchema#".
         * 
         * @todo [http://www.w3.org/2001/XMLSchema-datatypes] is a synonym for
         *       the same namespace....
         */

        public static final String NAMESPACE_XSD = "http://www.w3.org/2001/XMLSchema#";

        /**
         * The XSDatatype URI for "boolean".
         */

        public static final String xsBoolean = NAMESPACE_XSD + "boolean";

        /**
         * The XSDatatype URI for "byte".
         */

        public static final String xsByte = NAMESPACE_XSD + "byte";

        /**
         * The XSDatatype URI for "short".
         */

        public static final String xsShort = NAMESPACE_XSD + "short";

        /**
         * The XSDatatype URI for "int".
         */

        public static final String xsInt = NAMESPACE_XSD + "int";

        /**
         * The XSDatatype URI for "lomg".
         */

        public static final String xsLong = NAMESPACE_XSD + "long";

        /**
         * The XSDatatype URI for "float".
         */

        public static final String xsFloat = NAMESPACE_XSD + "float";

        /**
         * The XSDatatype URI for "double".
         */

        public static final String xsDouble = NAMESPACE_XSD + "double";

        /**
         * The XSDatatype URI for "integer" - used for {@link
         * java.math.BigInteger} and natively by the XSD library for
         * {@link com.sun.msv.datatype.xsd.IntegerValueType}.
         */

        public static final String xsInteger = NAMESPACE_XSD + "integer";

        /**
         * The XSDatatype URI for "decimal" - used for {@link
         * java.math.BigDecimal}
         */

        public static final String xsDecimal = NAMESPACE_XSD + "decimal";

        /**
         * The XSDatatype URI for "string".
         */

        public static final String xsString = NAMESPACE_XSD + "string";

        /**
         * The XSDatatype URI for "anyURI".
         */

        public static final String xsAnyURI = NAMESPACE_XSD + "anyURI";

    }

    /**
     * Primary driver for the insert rate test.
     * 
     * @param nclass
     *            The #of distinct classes.
     * 
     * @param nproperty
     *            The #of distinct properties.
     * 
     * @param nliteral
     *            The #of plain literals, the #of literals for each language
     *            type, and the #of typed literals for each datatype URI.
     * 
     * @param litsize
     *            The average size of a literal. The generated literals use a
     *            normal distribution with this as their mean length (in
     *            characters).
     */
    public void doTest(final int nclass, final int nproperty,
            final int nliteral, final int litsize) throws IOException {

        AbstractTripleStore store = getStore();
        
        try {

            doTest(store, nclass, nproperty, nliteral, litsize);
            
        } finally {
            
            store.__tearDownUnitTest();
        }

    }
    
    private void doTest(AbstractTripleStore store, final int nclass,
            final int nproperty, final int nliteral, final int litsize)
            throws IOException {

        final URI[] cspace = new URI[nclass];
        final URI[] pspace = new URI[nproperty];
        final URI[] tspace = new URI[] {
                // uncomment to get data typed literals.
// new URIImpl( XMLSchema.xsInteger ),
//        new URIImpl( XMLSchema.xsFloat )
        };
        final String[] langSpace = new String[]{
                // uncomment to get language typed literals.
//                "en","de"
                };
        final int nliteral2 =
                nliteral + 
            nliteral * tspace.length +
            nliteral * langSpace.length
            ;
        final Literal[] lspace = new Literal[nliteral2];

        final int nvalues = nclass + nproperty + nliteral2;

        if( true ) {

            final long begin = System.currentTimeMillis();

            final ValueFactory fac = store.getValueFactory();
            
            log.info( "\nCreating "+nvalues+" values..." );
        
            for( int i=0; i 0 && index % 10000 == 0 ) {
                        
                        System.err.print( "." );

                        if( index % 100000 == 0 ) {

                    long now = System.currentTimeMillis();

                    long elapsed = now - begin2;

                    begin2 = now; // reset.

                            w.write
                    ( ""+index+"\t"+index2+"\t"+elapsed+"\t"+perSec(index2,elapsed)+"\n"
                      );

                    w.flush();

                    log.info
                    ( "\nCurrent insert rate"+
                      ": #statements(so far)="+index+
                      ": #statements(interval)="+index2+
                      ", elapsed(interval)="+elapsed+
                      ", stmts/sec="+perSec(index2,elapsed)
                      );

                    index2 = 0; // reset.

//                    m_repo.startTransaction();

                        }                   
                        
                    }
                    
                    index++;
                index2++;
                    
                }
                
            }
            
        }
        
        sbuf.flush();
        
        long elapsed = System.currentTimeMillis() - begin;

        w.write
            ( "Sustained insert rate"+
              ": #statements="+index+
              ", elapsed="+elapsed+
              ", stmts/sec="+perSec(index,elapsed)+"\n"
              );

        log.info
            ( "\nSustained insert rate"+
              ": #statements="+index+
              ", elapsed="+elapsed+
              ", stmts/sec="+perSec(index,elapsed)
              );

        w.flush();

        w.close();
        
        }

        /**
         * Returns a random but unique value within the identified type
         * space.
         *
         * @param t The data type URI.
         *
         * @param id A unique index used to obtain a unique value in the
         * identified type space.  Typically this is a one up identifier.
         */

        private String getRandomType( URI t, int id )
        {

        // FIXME This needs to be type sensitive.  For some types, the
        // size of the type space is of necessity limited.  For such
        // types I imagine that this method needs to recycle values,
        // which results in a net reduction in the size of the overal
        // literal space and hence the #of distinct statements that
        // can be made.

        return ""+id;

        }

        /**
         * Returns the quantity n expressed as a per-second rate or
         * "N/A" if the elapsed time is zero.
         */

        static final public String perSec( final int n, final long elapsed )
        {

        if( n == 0 ) return "0";

        return ((elapsed==0?"N/A":""+(int)(n/(elapsed/1000.))));

        }


        /**
         * Returns a writer named by the test and having the specified
         * filename extension.
         */

        public Writer getWriter( String ext )
            throws IOException
        {

        return new BufferedWriter
            ( new FileWriter
              ( getName()+ext
            )
              );

        }
                
        /**
         * @todo write tests for the individual indices, restart safety,
         *       concurrent writers, and concurrent writes with concurrent
         *       query.
         */
        public void test_tiny() throws IOException {

            // tiny
            int nclass = 3;
            int nproperty = 2;
            int nliteral = 2;
            int litsize = 100;

            doTest( nclass, nproperty, nliteral, litsize );

        }

        public void test_small() throws IOException {

            int nclass = 30;
            int nproperty = 20;
            int nliteral = 20;
            int litsize = 100;
      
            doTest( nclass, nproperty, nliteral, litsize );
    
        }

//        /**
//         * This tests nearly a million triples.
//         * 
//         * 
//         * Sustained insert rate: #statements=880000, elapsed=7422, stmts/sec=118566
//         * 
//         */
//        public void test_moderate() throws IOException {
//
//            int nclass = 200; // @todo at 300 this will force the journal to be extended on commit.
//            int nproperty = 20;
//            int nliteral = 20;
//            int litsize = 100;
//      
//            doTest( nclass, nproperty, nliteral, litsize );
//            
////          // moderate.
////          int nclass = 5000;
////          int nproperty = 20;
////          int nliteral = 30;
//////          int nliteral = 0;
////          int litsize = 300;
//
//        }

}