com.bigdata.rdf.sparql.ast.optimizers.TestASTDistinctTermScanOptimizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigdata-rdf-test Show documentation
Show all versions of bigdata-rdf-test Show documentation
Blazegraph(TM) RDF Test Suites
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 15, 2011
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import org.junit.Ignore;
import junit.framework.Test;
import junit.framework.TestSuite;
import com.bigdata.bop.IPredicate;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.spo.DistinctTermAdvancer;
import static com.bigdata.rdf.sparql.ast.optimizers.AbstractOptimizerTestCase.HelperFlag.*;
/**
* Test suite for {@link ASTDistinctTermScanOptimizer}.
*
* @see DISTINCT PREDICATEs
* query is slow
*
* @author Bryan Thompson
*/
public class TestASTDistinctTermScanOptimizer extends AbstractOptimizerTestCase {
public TestASTDistinctTermScanOptimizer() {
super();
}
public TestASTDistinctTermScanOptimizer(final String name) {
super(name);
}
@Override
IASTOptimizer newOptimizer() {
return new ASTOptimizerList(new ASTDistinctTermScanOptimizer());
}
public static Test suite()
{
final TestSuite suite = new TestSuite(ASTFastRangeCountOptimizer.class.getSimpleName());
suite.addTestSuite(TestQuadsModeAPs.class);
suite.addTestSuite(TestTriplesModeAPs.class);
return suite;
}
protected static abstract class AbstractASTDistinctTermScanTest extends TestASTDistinctTermScanOptimizer {
/**
*
* This is translated into a {@link DistinctTermAdvancer} and the
* distinct terms are simply bound onto ?s. The DISTINCT or REDUCED
* annotations are REQUIRED in the original AST and are NOT present in
* the rewritten AST (the distinct term scan always provides distinct
* solutions).
*
* Note: This is only possible when the values of the other variables
* are ignored (rather than being projected out).
*
*
* SELECT (DISTINCT|REDUCED) ?s {?s ?p ?o}
*
*
* TODO While it is possible to use the {@link DistinctTermAdvancer}
* where there are some constants in the statement pattern. E.g.,
* SELECT DISTINCT ?o {:s ?p ?o}
*
, this is only advantageous for QUADS when the context
* position is bound to a constant and the size of the named graph is
* large. In all other cases we are better off just using a pipeline
* join (standard join).
*
* FIXME Write an explicit test for a default graph access path. The
* graph variable is ignored and not projected out. The distinct term
* advancer will automatically possess the appropriate RDF Merge
* semantics.
*
*/
public void test_distinctTermScanOptimizer_01() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
final StatementPatternNode sp1 = newStatementPatternNode(
new VarNode(s), new VarNode(p), new VarNode(o));
/*
* Note: The assumption is that the range counts have
* already been attached for the basic triple pattern.
*/
sp1.setProperty(Annotations.ESTIMATED_CARDINALITY,
rangeCount_sp1);
final ProjectionNode projection = projection(varNode(s));
projection.setDistinct(true);
given = select(projection, where(sp1));
}
/**
* We need to convert:
*
*
* SELECT DISTINCT ?s {?s ?p ?o}
*
*
* into
*
*
* SELECT ?s {?s ?p ?o}
*
*
* where the triple pattern has been marked with the
* DISTINCT-TERM-SCAN:=[?s] annotation; and
*
* where the ESTIMATED_CARDINALITY of the triple pattern has
* been set to N/M * range-count(triple-pattern), where N is
* the number of prefix components that are projected out of
* the distinct term scan and M is the number of components
* in the key.
*
* Note: The DISTINCT/REDUCED annotation is REMOVED from the
* SELECT. The distinct term scan automatically imposes
* DISTINCT on the bound variables.
*
* Note: This requires coordination with the optimizer that
* attaches the range counts. I.e., it should either run
* first or not run if we have already attached the
* estimates for a given triple pattern.
*/
{
// the triple pattern.
final StatementPatternNode sp1 = newStatementPatternNode(
new VarNode(s), new VarNode(p), new VarNode(o));
/*
* Annotate with the name of the variable(s) to become
* bound to the fast range count of that triple pattern.
*/
final VarNode distinctTermScanVar = new VarNode(s);
sp1.setDistinctTermScanVar(distinctTermScanVar);
/*
* Estimate the cardinality of the distinct term scan
* access path. This is just a linear estimate based on
* assuming that we can do a proportional fraction of
* the work using the distinct term scan depending on
* how many distinct term scan variables there are and
* the arity and cardinality of the underlying triple or
* quad pattern access path.
*/
final long newRangeCount = (long) (1.0 / (store
.isQuads() ? 4 : 3)) * rangeCount_sp1;
/*
* Update the estimated cardinality on the SP.
*/
sp1.setProperty(Annotations.ESTIMATED_CARDINALITY,
newRangeCount);
// the optimizer also adds a SOPC key order to be used by the access path
sp1.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC");
// Note: DISTINCT|REDUCED are NOT part of the
// projection.
final ProjectionNode projection = projection(varNode(s));
projection.setDistinct(false);
projection.setReduced(false);
// the expected AST.
expected = select(projection, where(sp1));
}
}
}.test();
}
/**
* The triple pattern can be OPTIONAL (simple
* optional). This just means that we produce no bindings for ?s, which
* is exactly what would happen anyway in a SELECT with a single
* required triple pattern.
*
*
* SELECT (DISTINCT|REDUCED) ?s { OPTIONAL { ?s ?p ?o} }
*
*/
public void test_distinctTermScanOptimizer_optional_pattern() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z), OPTIONAL,
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
final long newRangeCount = (long) (1.0 / (store
.isQuads() ? 4 : 3)) * rangeCount_sp1;
StatementPatternNode sp =
statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z), OPTIONAL,
property(Annotations.ESTIMATED_CARDINALITY, newRangeCount),
property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s)));
sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC");
expected =
select(
projection(
varNode(s)
),
where(sp), NOT_DISTINCT, NOT_REDUCED
);
}
}
}.test();
}
/**
* Reject optimization in case of the constant in SP.
* SELECT DISTINCT ?s {?s :p ?o}
*/
public void test_distinctTermScanOptimizer_reject_constant_in_sp() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), constantNode(a), varNode(o),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
expected = new QueryRoot(given);
}
}
};
}
}
/**
* Quads mode specific test suite.
*/
public static class TestQuadsModeAPs extends
AbstractASTDistinctTermScanTest {
/**
* SELECT DISTINCT ?s { graph ?g {?s ?p ?o} }
*/
public void test_distinctTermScanOptimizer_variable_context_not_projected() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
final long newRangeCount = (long) (1.0 / (store
.isQuads() ? 4 : 3)) * rangeCount_sp1;
StatementPatternNode sp =
statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z),
property(Annotations.ESTIMATED_CARDINALITY, newRangeCount),
property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s)));
sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "SPOC");
expected =
select(
projection(
varNode(s)
),
where(sp), NOT_DISTINCT, NOT_REDUCED
);
}
}
}.test();
}
/**
* SELECT DISTINCT ?g { graph ?g {?s ?p ?o} }
*/
public void test_distinctTermScanOptimizer_variable_context_projected() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(z)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
final long newRangeCount = (long) (1.0 / (store
.isQuads() ? 4 : 3)) * rangeCount_sp1;
StatementPatternNode sp = statementPatternNode(
varNode(s), varNode(p), varNode(o), varNode(z),
property(Annotations.ESTIMATED_CARDINALITY, newRangeCount),
property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(z))
);
sp.setQueryHint(IPredicate.Annotations.KEY_ORDER, "CSPO");
expected =
select(
projection(
varNode(z)
),
where(sp), NOT_DISTINCT, NOT_REDUCED
);
}
}
}.test();
}
/**
* SELECT DISTINCT ?s { graph :g {?s ?p ?o} }
*/
@Ignore("edge case, not implemented yet")
public void test_distinctTermScanOptimizer_bound_context() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), constantNode(a),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
final long newRangeCount = (long) (1.0 / (store
.isQuads() ? 4 : 3)) * rangeCount_sp1;
expected =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), constantNode(a),
property(Annotations.ESTIMATED_CARDINALITY, newRangeCount),
property(Annotations.DISTINCT_TERM_SCAN_VAR, varNode(s))
)
), NOT_DISTINCT, NOT_REDUCED
);
}
}
};
}
/**
* Reject optimization in case of the constant in SP.
* SELECT DISTINCT ?s { GPRAPH ?g {?s :p ?o}}
*/
public void test_distinctTermScanOptimizer_reject_quads_constant_in_sp() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), constantNode(a), varNode(o), varNode(y),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
expected = new QueryRoot(given);
}
}
};
}
/**
* Reject optimization in case of the constant context.
* SELECT DISTINCT ?s { GPRAPH :g {?s ?p ?o}}
*/
public void test_distinctTermScanOptimizer_reject_constant_context() {
new Helper() {
{
final long rangeCount_sp1 = 1000L;
{
given =
select(
projection(
varNode(s)
),
where(
statementPatternNode(
varNode(s), varNode(p), varNode(o), constantNode(a),
property(Annotations.ESTIMATED_CARDINALITY, rangeCount_sp1)
)
), DISTINCT
);
}
{
expected = new QueryRoot(given);
}
}
};
}
} // quads
/**
* Quads mode specific test suite.
*/
public static class TestTriplesModeAPs extends
AbstractASTDistinctTermScanTest {
} // triples
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy