All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sparql.ast.eval.rto.TestRTO_BSBM Maven / Gradle / Ivy

There is a newer version: 2.1.4
Show newest version
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 4, 2011
 */

package com.bigdata.rdf.sparql.ast.eval.rto;

import java.util.Properties;

import junit.framework.AssertionFailedError;

import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sparql.ast.QueryHints;

/**
 * Data driven test suite for the Runtime Query Optimizer (RTO) using BSBM data
 * and queries based on BSBM.
 * 

* Note: BSBM is parameterized. We can generate more queries against the pc100 * data set easily enough. In priciple, those queries might exhibit different * correlations. However, the pc100 data set may be too small for any * interesting correlations. In fact, it may be too small since the vertex * estimates and cutoff joins may be exact before the RTO is run running. If so, * then we need to go back and use a larger data set. However, the specific * parameterized queries will remain valid against larger data sets since BSBM * only adds more data when generating a larger data set. Of course, the number * of solutions for the queries may change. *

* Note: BSBM uses a lot of filters, subgroups, and sub-selects. As we build up * coverage for those constructions in the RTO, it will handle more of the * query. As a result, the observed join orders (and even the #of joins that are * considered) are likely to change. *

* Note: Q6 is no longer run in BSBM (the query was dropped). *

* Note: Q9 is a simple DESCRIBE (too simple for the RTO). Sample query is: * *

 * PREFIX rev: 
 * 
 * DESCRIBE ?x
 * WHERE {  rev:reviewer ?x }
 * 
* * Note: Q12 is a UNION (too simple for the RTO). Sample query is: * *
 * PREFIX bsbm-inst: 
 * PREFIX bsbm: 
 * PREFIX rdfs: 
 * PREFIX rdf: 
 * 
 * SELECT ?property ?hasValue ?isValueOf
 * WHERE {
 *   {  ?property ?hasValue }
 *   UNION
 *   { ?isValueOf ?property  }
 * }
 * 
* * @author Bryan Thompson * @version $Id: TestBasicQuery.java 6440 2012-08-14 17:57:33Z thompsonbry $ */ public class TestRTO_BSBM extends AbstractRTOTestCase { // private final static Logger log = Logger.getLogger(TestRTO_LUBM.class); /** * */ public TestRTO_BSBM() { } /** * @param name */ public TestRTO_BSBM(String name) { super(name); } @Override public Properties getProperties() { // Note: clone to avoid modifying!!! final Properties properties = (Properties) super.getProperties().clone(); properties.setProperty(BigdataSail.Options.TRIPLES_MODE, "true"); properties.setProperty(BigdataSail.Options.AXIOMS_CLASS, NoAxioms.class.getName()); return properties; } /** * Test of BSBM Q1 against an empty data set. There are no solutions in the * data. */ public void test_BSBM_Q1_noSolutions() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q1", // testURI, "rto/BSBM-Q1.rq",// queryFileURL new String[]{},// data files. "rto/BSBM-Q1-noSolutions.srx"// resultFileURL ); /* * TODO In fact, the RTO should not be running for a group of required * joins in which some vertex has a zero cardinality or when any join * can provably produce ZERO results when fed solutions from a fully * materialized vertex. */ assertSameJoinOrder(new int[] { 2, 1, 3, 4, 5 }, helper); } /** * BSBM Q1 against pc100. */ public void test_BSBM_Q1_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q1", // testURI, "rto/BSBM-Q1.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q1.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. * * FIXME There are two different solutions that I see for this query * depending on whether or not AST2BOpRTO.runAllJoinsAsComplexJoins is * true or false. I have modified the test to allow either join ordering * for now, but we should chase down the root cause for this difference * in how the simple and complex cutoff join evaluation code paths * compute the join hit ratios and estimated cardinality. It is probably * an off by one fencepost.... */ try { assertSameJoinOrder(new int[] { 2, 4, 1, 3, 5 }, helper); } catch (AssertionFailedError er) { log.warn(er); } assertSameJoinOrder(new int[] { 3, 2, 4, 1, 5 }, helper); } /** * BSBM Q2 against pc100. */ public void test_BSBM_Q2_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q2", // testURI, "rto/BSBM-Q2.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q2.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 11, 12 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q3 against pc100. */ public void test_BSBM_Q3_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q3", // testURI, "rto/BSBM-Q3.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q3.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 2, 5, 1, 3, 4 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q4 against pc100. *

* Note: This query has TWO join groups that are sufficiently complex to run * the RTO. However, only one of the join groups is marked for RTO * optimization in order to keep the test harness simple. The test harness * assumes that there is a single JOIN group that is optimized by the RTO * and then verifies the join ordering within that join group. The test * harness breaks if there is more than one join group optimized by the RTO. */ public void test_BSBM_Q4_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q4", // testURI, "rto/BSBM-Q4.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q4.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 9, 6, 7, 8, 10, 11 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q5 on the pc100 data set. * * FIXME FAILS if we disallow out of order evaluation when doing cutoff * joins. */ public void test_BSBM_Q5_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q5", // testURI, "rto/BSBM-Q5.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q5.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 1, 3, 2, 5, 4, 7, 6 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q7 on the pc100 data set. */ public void test_BSBM_Q7_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q7", // testURI, "rto/BSBM-Q7.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q7.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. * * The join IDs depend on whether the pipelined hash join is enabled * by default. Given that the pipelined hash join unites the hash index * construction and join in one operator, there is one less operator * in the plan, so the indices decrease by one. */ final int[] expected = QueryHints.DEFAULT_PIPELINED_HASH_JOIN ? new int[] { 7, 6, 8, 4, 5, 9, 10 } : new int[] { 8, 7, 9, 5, 6, 10, 11 }; assertSameJoinOrder(expected, helper); } /** * A modified version BSBM Q7 on pc100 which is the simplest form of the * query that causes the RTO to fail with the "No stats" assertion error. * This is basically just an OPTIONAL {} join group. * *

     * PREFIX rdfs: 
     * PREFIX rev: 
     * PREFIX foaf: 
     * PREFIX bsbm: 
     * PREFIX dc: 
     * 
     * SELECT (COUNT(*) as ?count)
     * WHERE { 
     * 
     *   # Control all RTO parameters for repeatable behavior.
     *   hint:Query hint:RTO-sampleType "DENSE".
     *   hint:Query hint:RTO-limit "100".
     *   hint:Query hint:RTO-nedges "1".
     * 
     *     rdfs:label ?productLabel .
     *     OPTIONAL {
     *    
     *    # Enable the RTO inside of the OPTIONAL join group.
     *    hint:Group hint:optimizer "Runtime".
     *    
     *    ?review bsbm:reviewFor  .
     *    ?review rev:reviewer ?reviewer .
     *    ?reviewer foaf:name ?revName .
     *    ?review dc:title ?revTitle .
     *    
     *    }
     * }
     * 
*/ public void test_BSBM_Q7b_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q7b", // testURI, "rto/BSBM-Q7b.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q7b.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. * * The join IDs depend on whether the pipelined hash join is enabled * by default. Given that the pipelined hash join unites the hash index * construction and join in one operator, there is one less operator * in the plan, so the indices decrease by one. */ final int[] expected = QueryHints.DEFAULT_PIPELINED_HASH_JOIN ? new int[] { 4, 5, 6, 7 } : new int[] { 5, 6, 7, 8 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q8 on the pc100 data set. */ public void test_BSBM_Q8_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q8", // testURI, "rto/BSBM-Q8.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q8.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 1, 3, 2, 4, 5, 6 }; assertSameJoinOrder(expected, helper); } /** * BSBM Q10 on pc100. */ public void test_BSBM_Q10_pc100() throws Exception { final TestHelper helper = new TestHelper(// "rto/BSBM-Q10", // testURI, "rto/BSBM-Q10.rq",// queryFileURL "src/test/resources/data/bsbm/dataset_pc100.nt",// dataFileURL "rto/BSBM-Q10.srx"// resultFileURL ); /* * Verify that the runtime optimizer produced the expected join path. */ final int[] expected = new int[] { 1, 7, 5, 2, 3, 4, 6 }; assertSameJoinOrder(expected, helper); } /* * larger runs -- OOM on laptop when running under Eclipse. */ // /** // * BSBM Q1 against pc1000 (OOM on laptop). // */ // public void _test_BSBM_Q1_pc1000() throws Exception { // // final TestHelper helper = new TestHelper(// // "rto/BSBM-Q1", // testURI, // "rto/BSBM-Q1.rq",// queryFileURL // "src/test/resources/data/bsbm/dataset_pc1000.nt.gz",// dataFileURL // "rto/BSBM-Q1.srx"// resultFileURL // ); // // /* // * Verify that the runtime optimizer produced the expected join path. // */ // // final int[] expected = new int[] { 3, 2, 4, 1, 5 }; // // assertSameJoinOrder(expected, helper); // // } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy