com.bigdata.rdf.sparql.ast.eval.rto.TestRTO_BSBM Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigdata-rdf-test Show documentation
Blazegraph(TM) RDF Test Suites
There is a newer version: 2.1.4
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 4, 2011
 */

package com.bigdata.rdf.sparql.ast.eval.rto;

import java.util.Properties;

import junit.framework.AssertionFailedError;

import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sparql.ast.QueryHints;

/**
 * Data driven test suite for the Runtime Query Optimizer (RTO) using BSBM data
 * and queries based on BSBM.
 * 
 * Note: BSBM is parameterized. We can generate more queries against the pc100
 * data set easily enough. In priciple, those queries might exhibit different
 * correlations. However, the pc100 data set may be too small for any
 * interesting correlations. In fact, it may be too small since the vertex
 * estimates and cutoff joins may be exact before the RTO is run running. If so,
 * then we need to go back and use a larger data set. However, the specific
 * parameterized queries will remain valid against larger data sets since BSBM
 * only adds more data when generating a larger data set. Of course, the number
 * of solutions for the queries may change.
 * 

 * Note: BSBM uses a lot of filters, subgroups, and sub-selects. As we build up
 * coverage for those constructions in the RTO, it will handle more of the
 * query. As a result, the observed join orders (and even the #of joins that are
 * considered) are likely to change.
 * 

 * Note: Q6 is no longer run in BSBM (the query was dropped).
 * 

 * Note: Q9 is a simple DESCRIBE (too simple for the RTO). Sample query is:
 * 
 * 
 * PREFIX rev: 
 * 
 * DESCRIBE ?x
 * WHERE {  rev:reviewer ?x }
 * 
 * 
 * Note: Q12 is a UNION (too simple for the RTO). Sample query is:
 * 
 *  * PREFIX bsbm-inst: 
 * PREFIX bsbm: 
 * PREFIX rdfs: 
 * PREFIX rdf: 
 * 
 * SELECT ?property ?hasValue ?isValueOf
 * WHERE {
 *   {  ?property ?hasValue }
 *   UNION
 *   { ?isValueOf ?property  }
 * }
 * 
 * 
 * @author Bryan Thompson
 * @version $Id: TestBasicQuery.java 6440 2012-08-14 17:57:33Z thompsonbry $
 */
public class TestRTO_BSBM extends AbstractRTOTestCase {

//    private final static Logger log = Logger.getLogger(TestRTO_LUBM.class);
    
    /**
     * 
     */
    public TestRTO_BSBM() {
    }

    /**
     * @param name
     */
    public TestRTO_BSBM(String name) {
        super(name);
    }

    @Override
    public Properties getProperties() {

        // Note: clone to avoid modifying!!!
        final Properties properties = (Properties) super.getProperties().clone();

        properties.setProperty(BigdataSail.Options.TRIPLES_MODE, "true");

        properties.setProperty(BigdataSail.Options.AXIOMS_CLASS,
                NoAxioms.class.getName());

        return properties;
        
    }

    /**
     * Test of BSBM Q1 against an empty data set. There are no solutions in the
     * data. 
     */
    public void test_BSBM_Q1_noSolutions() throws Exception {

        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q1", // testURI,
                "rto/BSBM-Q1.rq",// queryFileURL
                new String[]{},// data files.
                "rto/BSBM-Q1-noSolutions.srx"// resultFileURL
        );
        
        /*
         * TODO In fact, the RTO should not be running for a group of required
         * joins in which some vertex has a zero cardinality or when any join
         * can provably produce ZERO results when fed solutions from a fully
         * materialized vertex.
         */

        assertSameJoinOrder(new int[] { 2, 1, 3, 4, 5 }, helper);
      
    }

    /**
     * BSBM Q1 against pc100.
     */
    public void test_BSBM_Q1_pc100() throws Exception {

        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q1", // testURI,
                "rto/BSBM-Q1.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q1.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         * 
         * FIXME There are two different solutions that I see for this query
         * depending on whether or not AST2BOpRTO.runAllJoinsAsComplexJoins is
         * true or false. I have modified the test to allow either join ordering
         * for now, but we should chase down the root cause for this difference
         * in how the simple and complex cutoff join evaluation code paths
         * compute the join hit ratios and estimated cardinality. It is probably
         * an off by one fencepost....
         */

        try {
            assertSameJoinOrder(new int[] { 2, 4, 1, 3, 5 }, helper);
        } catch (AssertionFailedError er) {
            log.warn(er);
        }
        assertSameJoinOrder(new int[] { 3, 2, 4, 1, 5 }, helper);
      
    }
    
    /**
     * BSBM Q2 against pc100.
     */
    public void test_BSBM_Q2_pc100() throws Exception {

        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q2", // testURI,
                "rto/BSBM-Q2.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q2.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */
        final int[] expected = new int[] { 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 11, 12 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /**
     * BSBM Q3 against pc100.
     */
    public void test_BSBM_Q3_pc100() throws Exception {

        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q3", // testURI,
                "rto/BSBM-Q3.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q3.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */
        final int[] expected = new int[] { 2, 5, 1, 3, 4 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /**
     * BSBM Q4 against pc100.
     * 
     * Note: This query has TWO join groups that are sufficiently complex to run
     * the RTO. However, only one of the join groups is marked for RTO
     * optimization in order to keep the test harness simple. The test harness
     * assumes that there is a single JOIN group that is optimized by the RTO
     * and then verifies the join ordering within that join group. The test
     * harness breaks if there is more than one join group optimized by the RTO.
     */
    public void test_BSBM_Q4_pc100() throws Exception {
       
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q4", // testURI,
                "rto/BSBM-Q4.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q4.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */
        final int[] expected = new int[] { 9, 6, 7, 8, 10, 11 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /**
     * BSBM Q5 on the pc100 data set.
     * 
     * FIXME FAILS if we disallow out of order evaluation when doing cutoff
     * joins.
     */
    public void test_BSBM_Q5_pc100() throws Exception {
        
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q5", // testURI,
                "rto/BSBM-Q5.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q5.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */

        final int[] expected = new int[] { 1, 3, 2, 5, 4, 7, 6 };

        assertSameJoinOrder(expected, helper);
        
    }

    /**
     * BSBM Q7 on the pc100 data set.
     */
    public void test_BSBM_Q7_pc100() throws Exception {
        
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q7", // testURI,
                "rto/BSBM-Q7.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q7.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         * 
         * The join IDs depend on whether the pipelined hash join is enabled
         * by default. Given that the pipelined hash join unites the hash index
         * construction and join in one operator, there is one less operator
         * in the plan, so the indices decrease by one.
         */
        final int[] expected = QueryHints.DEFAULT_PIPELINED_HASH_JOIN ?
            new int[] { 7, 6, 8, 4, 5, 9, 10 } :
            new int[] { 8, 7, 9, 5, 6, 10, 11 };

        assertSameJoinOrder(expected, helper);

    }
    
    /**
     * A modified version BSBM Q7 on pc100 which is the simplest form of the
     * query that causes the RTO to fail with the "No stats" assertion error.
     * This is basically just an OPTIONAL {} join group.
     * 
     * 
     * PREFIX rdfs: 
     * PREFIX rev: 
     * PREFIX foaf: 
     * PREFIX bsbm: 
     * PREFIX dc: 
     * 
     * SELECT (COUNT(*) as ?count)
     * WHERE { 
     * 
     *   # Control all RTO parameters for repeatable behavior.
     *   hint:Query hint:RTO-sampleType "DENSE".
     *   hint:Query hint:RTO-limit "100".
     *   hint:Query hint:RTO-nedges "1".
     * 
     *     rdfs:label ?productLabel .
     *     OPTIONAL {
     *    
     *    # Enable the RTO inside of the OPTIONAL join group.
     *    hint:Group hint:optimizer "Runtime".
     *    
     *    ?review bsbm:reviewFor  .
     *    ?review rev:reviewer ?reviewer .
     *    ?reviewer foaf:name ?revName .
     *    ?review dc:title ?revTitle .
     *    
     *    }
     * }
     * 
     */
    public void test_BSBM_Q7b_pc100() throws Exception {
        
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q7b", // testURI,
                "rto/BSBM-Q7b.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q7b.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         * 
         * The join IDs depend on whether the pipelined hash join is enabled
         * by default. Given that the pipelined hash join unites the hash index
         * construction and join in one operator, there is one less operator
         * in the plan, so the indices decrease by one.
         */
        final int[] expected = QueryHints.DEFAULT_PIPELINED_HASH_JOIN ?
            new int[] { 4, 5, 6, 7 } :
            new int[] { 5, 6, 7, 8 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /**
     * BSBM Q8 on the pc100 data set.
     */
    public void test_BSBM_Q8_pc100() throws Exception {
        
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q8", // testURI,
                "rto/BSBM-Q8.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q8.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */

        final int[] expected = new int[] { 1, 3, 2, 4, 5, 6 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /**
     * BSBM Q10 on pc100.
     */
    public void test_BSBM_Q10_pc100() throws Exception {
        
        final TestHelper helper = new TestHelper(//
                "rto/BSBM-Q10", // testURI,
                "rto/BSBM-Q10.rq",// queryFileURL
                "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL
                "rto/BSBM-Q10.srx"// resultFileURL
        );
        
        /*
         * Verify that the runtime optimizer produced the expected join path.
         */

        final int[] expected = new int[] { 1, 7, 5, 2, 3, 4, 6 };

        assertSameJoinOrder(expected, helper);
        
    }
    
    /*
     * larger runs -- OOM on laptop when running under Eclipse.
     */
    
//    /**
//     * BSBM Q1 against pc1000 (OOM on laptop).
//     */
//    public void _test_BSBM_Q1_pc1000() throws Exception {
//
//        final TestHelper helper = new TestHelper(//
//                "rto/BSBM-Q1", // testURI,
//                "rto/BSBM-Q1.rq",// queryFileURL
//                "src/test/resources/data/bsbm/dataset_pc1000.nt.gz",// dataFileURL
//                "rto/BSBM-Q1.srx"// resultFileURL
//        );
//        
//        /*
//         * Verify that the runtime optimizer produced the expected join path.
//         */
//
//        final int[] expected = new int[] { 3, 2, 4, 1, 5 };
//
//        assertSameJoinOrder(expected, helper);
//        
//    }    
    
}