All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sparql.ast.optimizers.TestASTDistinctTermScanOptimizer Maven / Gradle / Ivy

There is a newer version: 2.1.4
Show newest version
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 15, 2011
 */

package com.bigdata.rdf.sparql.ast.optimizers;

import org.junit.Ignore;

import junit.framework.Test;
import junit.framework.TestSuite;

import com.bigdata.bop.IPredicate;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.spo.DistinctTermAdvancer;

import static com.bigdata.rdf.sparql.ast.optimizers.AbstractOptimizerTestCase.HelperFlag.*;


/**
 * Test suite for {@link ASTDistinctTermScanOptimizer}.
 * 
 * @see  DISTINCT PREDICATEs
 *      query is slow 
 *      
 * @author Bryan Thompson
 */
public class TestASTDistinctTermScanOptimizer extends AbstractOptimizerTestCase {

	public TestASTDistinctTermScanOptimizer() {
		super();
	}

	public TestASTDistinctTermScanOptimizer(final String name) {
		super(name);
	}

	@Override
	IASTOptimizer newOptimizer() {

		return new ASTOptimizerList(new ASTDistinctTermScanOptimizer());
		
	}

    public static Test suite()
    {

        final TestSuite suite = new TestSuite(ASTFastRangeCountOptimizer.class.getSimpleName());

        suite.addTestSuite(TestQuadsModeAPs.class);

        suite.addTestSuite(TestTriplesModeAPs.class);
        
        return suite;
    }
    
    protected static abstract class AbstractASTDistinctTermScanTest extends TestASTDistinctTermScanOptimizer {
		/**
		 * 
		 * This is translated into a {@link DistinctTermAdvancer} and the
		 * distinct terms are simply bound onto ?s. The DISTINCT or REDUCED
		 * annotations are REQUIRED in the original AST and are NOT present in
		 * the rewritten AST (the distinct term scan always provides distinct
		 * solutions).
		 * 
		 * Note: This is only possible when the values of the other variables
		 * are ignored (rather than being projected out).
		 * 
		 * 
		 * SELECT (DISTINCT|REDUCED) ?s {?s ?p ?o}
		 * 
* * TODO While it is possible to use the {@link DistinctTermAdvancer} * where there are some constants in the statement pattern. E.g., * SELECT DISTINCT ?o {:s ?p ?o} * , this is only advantageous for QUADS when the context * position is bound to a constant and the size of the named graph is * large. In all other cases we are better off just using a pipeline * join (standard join). * * FIXME Write an explicit test for a default graph access path. The * graph variable is ignored and not projected out. The distinct term * advancer will automatically possess the appropriate RDF Merge * semantics. * */ public void test_distinctTermScanOptimizer_01() { new Helper() { { final long rangeCount_sp1 = 1000L; { final StatementPatternNode sp1 = newStatementPatternNode( new VarNode(s), new VarNode(p), new VarNode(o)); /* * Note: The assumption is that the range counts have * already been attached for the basic triple pattern. */ sp1.setProperty(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1); final ProjectionNode projection = projection(varNode(s)); projection.setDistinct(true); given = select(projection, where(sp1)); } /** * We need to convert: * *
					 * SELECT DISTINCT ?s {?s ?p ?o}
					 * 
* * into * *
					 * SELECT ?s {?s ?p ?o}
					 * 
* * where the triple pattern has been marked with the * DISTINCT-TERM-SCAN:=[?s] annotation; and * * where the ESTIMATED_CARDINALITY of the triple pattern has * been set to N/M * range-count(triple-pattern), where N is * the number of prefix components that are projected out of * the distinct term scan and M is the number of components * in the key. * * Note: The DISTINCT/REDUCED annotation is REMOVED from the * SELECT. The distinct term scan automatically imposes * DISTINCT on the bound variables. * * Note: This requires coordination with the optimizer that * attaches the range counts. I.e., it should either run * first or not run if we have already attached the * estimates for a given triple pattern. */ { // the triple pattern. final StatementPatternNode sp1 = newStatementPatternNode( new VarNode(s), new VarNode(p), new VarNode(o)); /* * Annotate with the name of the variable(s) to become * bound to the fast range count of that triple pattern. */ final VarNode distinctTermScanVar = new VarNode(s); sp1.setDistinctTermScanVar(distinctTermScanVar); /* * Estimate the cardinality of the distinct term scan * access path. This is just a linear estimate based on * assuming that we can do a proportional fraction of * the work using the distinct term scan depending on * how many distinct term scan variables there are and * the arity and cardinality of the underlying triple or * quad pattern access path. */ final long newRangeCount = (long) (1.0 / (store .isQuads() ? 4 : 3)) * rangeCount_sp1; /* * Update the estimated cardinality on the SP. */ sp1.setProperty(Annotations.ESTIMATED_CARDINALITY, newRangeCount); // the optimizer also adds a SOPC key order to be used by the access path sp1.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC"); // Note: DISTINCT|REDUCED are NOT part of the // projection. final ProjectionNode projection = projection(varNode(s)); projection.setDistinct(false); projection.setReduced(false); // the expected AST. expected = select(projection, where(sp1)); } } }.test(); } /** * The triple pattern can be OPTIONAL (simple * optional). This just means that we produce no bindings for ?s, which * is exactly what would happen anyway in a SELECT with a single * required triple pattern. * *
		 * SELECT (DISTINCT|REDUCED) ?s { OPTIONAL { ?s ?p ?o} }
		 * 
*/ public void test_distinctTermScanOptimizer_optional_pattern() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), OPTIONAL, property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { final long newRangeCount = (long) (1.0 / (store .isQuads() ? 4 : 3)) * rangeCount_sp1; StatementPatternNode sp = statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), OPTIONAL, property(Annotations.ESTIMATED_CARDINALITY, newRangeCount), property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s))); sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC"); expected = select( projection( varNode(s) ), where(sp), NOT_DISTINCT, NOT_REDUCED ); } } }.test(); } /** * Reject optimization in case of the constant in SP. *
SELECT DISTINCT ?s {?s :p ?o} 
*/ public void test_distinctTermScanOptimizer_reject_constant_in_sp() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), constantNode(a), varNode(o), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { expected = new QueryRoot(given); } } }; } } /** * Quads mode specific test suite. */ public static class TestQuadsModeAPs extends AbstractASTDistinctTermScanTest { /** * SELECT DISTINCT ?s { graph ?g {?s ?p ?o} } */ public void test_distinctTermScanOptimizer_variable_context_not_projected() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { final long newRangeCount = (long) (1.0 / (store .isQuads() ? 4 : 3)) * rangeCount_sp1; StatementPatternNode sp = statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), property(Annotations.ESTIMATED_CARDINALITY, newRangeCount), property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s))); sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC"); expected = select( projection( varNode(s) ), where(sp), NOT_DISTINCT, NOT_REDUCED ); } } }.test(); } /** * SELECT DISTINCT ?g { graph ?g {?s ?p ?o} } */ public void test_distinctTermScanOptimizer_variable_context_projected() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(z) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { final long newRangeCount = (long) (1.0 / (store .isQuads() ? 4 : 3)) * rangeCount_sp1; StatementPatternNode sp = statementPatternNode( varNode(s), varNode(p), varNode(o), varNode(z), property(Annotations.ESTIMATED_CARDINALITY, newRangeCount), property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(z)) ); sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "CSPO"); expected = select( projection( varNode(z) ), where(sp), NOT_DISTINCT, NOT_REDUCED ); } } }.test(); } /** * SELECT DISTINCT ?s { graph :g {?s ?p ?o} } */ @Ignore("edge case, not implemented yet") public void test_distinctTermScanOptimizer_bound_context() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), constantNode(a), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { final long newRangeCount = (long) (1.0 / (store .isQuads() ? 4 : 3)) * rangeCount_sp1; expected = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), constantNode(a), property(Annotations.ESTIMATED_CARDINALITY, newRangeCount), property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s)) ) ), NOT_DISTINCT, NOT_REDUCED ); } } }; } /** * Reject optimization in case of the constant in SP. *
SELECT DISTINCT ?s { GPRAPH ?g {?s :p ?o}} 
*/ public void test_distinctTermScanOptimizer_reject_quads_constant_in_sp() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), constantNode(a), varNode(o), varNode(y), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { expected = new QueryRoot(given); } } }; } /** * Reject optimization in case of the constant context. *
SELECT DISTINCT ?s { GPRAPH :g {?s ?p ?o}} 
*/ public void test_distinctTermScanOptimizer_reject_constant_context() { new Helper() { { final long rangeCount_sp1 = 1000L; { given = select( projection( varNode(s) ), where( statementPatternNode( varNode(s), varNode(p), varNode(o), constantNode(a), property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1) ) ), DISTINCT ); } { expected = new QueryRoot(given); } } }; } } // quads /** * Quads mode specific test suite. */ public static class TestTriplesModeAPs extends AbstractASTDistinctTermScanTest { } // triples }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy