com.bigdata.rdf.sparql.ast.eval.TestTCK Maven / Gradle / Ivy
Show all versions of bigdata-rdf-test Show documentation
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 7, 2011
*/
package com.bigdata.rdf.sparql.ast.eval;
import junit.framework.TestResult;
import junit.framework.TestSuite;
import org.apache.log4j.Logger;
import com.bigdata.BigdataStatics;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTBottomUpOptimizer;
import com.bigdata.rdf.sparql.ast.optimizers.ASTSimpleOptionalOptimizer;
/**
* Test driver for debugging Sesame or DAWG manifest tests.
*
* @author Bryan Thompson
*/
public class TestTCK extends AbstractDataDrivenSPARQLTestCase {
private static final Logger log = Logger.getLogger(TestTCK.class);
/**
*
*/
public TestTCK() {
}
/**
* @param name
*/
public TestTCK(String name) {
super(name);
}
/**
* This is not a DAWG test. There is no data for the aggregation. Sesame is
* expecting a solution set consisting of a single solution with
* 0^^xsd:integer. We are producing an empty solution set, which is the
* correct result. The expected answer has been modified here so the test
* will pass, but it will still fail in the Sesame test suite until they fix
* the issue in their test manifest.
*
* See http://www.openrdf.org/issues/browse/SES-884 (Aggregation with an
* solution set as input should produce an empty solution as output)
*/
public void test_sparql11_sum_02() throws Exception {
new TestHelper(
"sparql11-sum-02", // testURI,
"sparql11-sum-02.rq",// queryFileURL
"sparql11-sum-02.ttl",// dataFileURL
"sparql11-sum-02.srx"// resultFileURL
).runTest();
}
/**
* This is not a DAWG test. The query is simple enough. However, one of the
* values which is bound on ?x
is a plain literal rather than a
* numeric datatype literal.
*
*
* PREFIX :
* SELECT (SUM(?x) AS ?total)
* WHERE {
* ?a :p ?x.
* }
* GROUP BY ?a
*
*
*
* @prefix : .
*
* :a :p "1" .
* :a :p 1 .
* :b :p 3 .
* :b :p 4 .
*
*
* The test as present in Sesame 2.5 is wrong. I've filed a bug report. The
* correct solution should be two groups. The group for :a
* should have an unbound value for ?x
. The group for
* :b
should have a bound value of
* "7"^^xsd:integer
for ?x
.
*
* Note: This test will continue to fail in the TCK until Sesame resolve the
* issue, which will require both updating their test suite and also
* updating their error handling logic to conform with the spec.
*
* @see http://www.openrdf.org/issues/browse/SES-862
*/
public void test_sparql11_sum_03() throws Exception {
new TestHelper(
"sparql11-sum-03", // testURI,
"sparql11-sum-03.rq",// queryFileURL
"sparql11-sum-03.ttl",// dataFileURL
"sparql11-sum-03.srx"// resultFileURL
).runTest();
}
/**
* This is not a DAWG test. There is no data for the aggregation. Sesame is
* expecting a solution set consisting of a single solution with
* 0^^xsd:integer. We are producing an empty solution set.
*/
public void test_sparql11_count_03() throws Exception {
new TestHelper(
"sparql11-count-03", // testURI,
"sparql11-count-03.rq",// queryFileURL
"sparql11-count-03.ttl",// dataFileURL
"sparql11-count-03.srx"// resultFileURL
).runTest();
}
/**
* This is not a DAWG test. We produce 10^^xsd:decimal while openrdf expects
* 10.0^^xsd:decimal.
*/
public void test_sparql11_sum_04() throws Exception {
new TestHelper(
"sparql11-sum-04", // testURI,
"sparql11-sum-04.rq",// queryFileURL
"sparql11-sum-04.ttl",// dataFileURL
"sparql11-sum-04.srx"// resultFileURL
).runTest();
}
/**
* This is not a DAWG test.
*/
public void test_sparql11_in_02() throws Exception {
new TestHelper(
"sparql11-in-02", // testURI,
"sparql11-in-02.rq",// queryFileURL
"sparql11-in-02.ttl",// dataFileURL
"sparql11-in-02.srx"// resultFileURL
).runTest();
}
// /**
// * TODO I can not figure out why these "dataset" tests fail.
// */
// public void test_dataset_01() throws Exception {
//
// new TestHelper(
// "dataset-01", // testURI,
// "dataset-01.rq",// queryFileURL
// "data-g1.ttl", // dataFileURL
// "dataset-01.ttl"// resultFileURL
// ).runTest();
//
// }
/**
* Test effective boolean value - optional.
*
*
* PREFIX xsd:
* PREFIX :
* SELECT ?a
* WHERE
* { ?a :p ?v .
* OPTIONAL
* { ?a :q ?w } .
* FILTER (?w) .
* }
*
*
* @see ASTSimpleOptionalOptimizer
*/
public void test_sparql_bev_5() throws Exception {
new TestHelper(
"bev-5", // testURI,
"bev-5.rq",// queryFileURL
"bev-5.ttl",// dataFileURL
"bev-5-result.ttl"// resultFileURL
).runTest();
}
/**
* Nested Optionals - 1
. Badly designed left join with TWO
* optionals.
*
*
* SELECT *
* {
* :x1 :p ?v .
* OPTIONAL
* {
* :x3 :q ?w .
* OPTIONAL { :x2 :p ?v }
* }
* }
*
*
* Note: Because this is a badly designed left join, it is translated into a
* named subquery. However, the translated query will fail if the "OPTIONAL"
* semantics are not being attached to the INCLUDE.
*
* @see ASTBottomUpOptimizer
*/
public void test_two_nested_opt() throws Exception {
new TestHelper(
"two-nested-opt", // testURI,
"two-nested-opt.rq",// queryFileURL
"two-nested-opt.ttl",// dataFileURL
"two-nested-opt.srx"// resultFileURL
).runTest();
}
/**
* This is the same query, except we are running the rewritten version of
* the query (that is, the version that we have to produce for the query
* above with the badly designed left join).
*
*/
public void test_two_nested_opt2() throws Exception {
new TestHelper(
"two-nested-opt", // testURI,
"two-nested-opt2.rq",// queryFileURL
"two-nested-opt.ttl",// dataFileURL
"two-nested-opt.srx"// resultFileURL
).runTest();
}
/**
* This case is not a problem. It provides a contrast to
* {@link #test_filter_nested_2()}
*/
public void test_filter_nested_1() throws Exception {
new TestHelper(
"filter-nested-1", // testURI,
"filter-nested-1.rq",// queryFileURL
"filter-nested-1.ttl",// dataFileURL
"filter-nested-1.srx"// resultFileURL
).runTest();
}
/**
* Filter-nested - 2 (Filter on variable ?v which is not in scope)
*
*
* SELECT ?v
* { :x :p ?v . { FILTER(?v = 1) } }
*
*
* @see ASTBottomUpOptimizer
*/
public void test_filter_nested_2() throws Exception {
new TestHelper(
"filter-nested-2", // testURI,
"filter-nested-2.rq",// queryFileURL
"filter-nested-2.ttl",// dataFileURL
"filter-nested-2.srx"// resultFileURL
).runTest();
}
/**
* Badly designed left join pattern plus a FILTER with a variable which is
* not in scope.
*
*
* PREFIX :
*
* SELECT *
* {
* :x :p ?v .
* { :x :q ?w
* OPTIONAL { :x :p ?v2 FILTER(?v = 1) }
* }
* }
*
*
* @see ASTBottomUpOptimizer
*/
public void test_filter_scope_1() throws Exception {
new TestHelper(
"filter-scope-1", // testURI,
"filter-scope-1.rq",// queryFileURL
"filter-scope-1.ttl",// dataFileURL
"filter-scope-1.srx"// resultFileURL
).runTest();
}
/**
* Classic badly designed left join.
*
*
* PREFIX :
*
* SELECT *
* {
* ?X :name "paul"
* {?Y :name "george" . OPTIONAL { ?X :email ?Z } }
* }
*
*
*
* @see ASTBottomUpOptimizer
*/
public void test_var_scope_join_1() throws Exception {
new TestHelper(
"var-scope-join-1", // testURI,
"var-scope-join-1.rq",// queryFileURL
"var-scope-join-1.ttl",// dataFileURL
"var-scope-join-1.srx"// resultFileURL
).runTest();
}
/**
* Complex optional semantics: 1.
*
*
* Complex optional: LeftJoin(LeftJoin(BGP(..),{..}),Join(BGP(..),Union(..,..)))
*
*
*
* PREFIX foaf:
* SELECT ?person ?nick ?page ?img ?name ?firstN
* {
* ?person foaf:nick ?nick
* OPTIONAL { ?person foaf:isPrimaryTopicOf ?page }
* OPTIONAL {
* ?person foaf:name ?name
* { ?person foaf:depiction ?img } UNION
* { ?person foaf:firstName ?firstN }
* } FILTER ( bound(?page) || bound(?img) || bound(?firstN) )
* }
*
*/
public void test_opt_complex_1() throws Exception {
new TestHelper(
"opt-complex-1", // testURI,
"opt-complex-1.rq",// queryFileURL
"opt-complex-1.ttl",// dataFileURL
"opt-complex-1-result.ttl"// resultFileURL
).runTest();
}
/**
* Complex optional semantics: 2 (dawg-optional-complex-2).
*
*
* Complex optional: LeftJoin(Join(BGP(..),Graph(var,{..})),Union(..,..))
*
*
*
* PREFIX foaf:
* PREFIX ex:
* SELECT ?id ?ssn
* WHERE
* {
* ?person
* a foaf:Person;
* foaf:name ?name .
* GRAPH ?x {
* [] foaf:name ?name;
* foaf:nick ?nick
* }
* OPTIONAL {
* { ?person ex:empId ?id } UNION { ?person ex:ssn ?ssn }
* }
* }
*
*
*
* :dawg-optional-complex-2 a mf:QueryEvaluationTest ;
* mf:name "Complex optional semantics: 2" ;
* rdfs:comment
* "Complex optional: LeftJoin(Join(BGP(..),Graph(var,{..})),Union(..,..))" ;
* dawgt:approval dawgt:Approved ;
* dawgt:approvedBy ;
* mf:action
* [ qt:query ;
* qt:graphData ;
* qt:data ] ;
* mf:result .
*
*/
public void test_opt_complex_2() throws Exception {
new TestHelper(
"opt-complex-2", // testURI,
"opt-complex-2.rq",// queryFileURL
new String[] { "opt-complex-2-data.ttl",
"opt-complex-2-graphData.ttl" },// dataFileURL
"opt-complex-2-result.ttl"// resultFileURL
).runTest();
}
/**
* Reification of the default graph (dawg-construct-reification-1).
*
*
* PREFIX rdf:
* PREFIX foaf:
*
* CONSTRUCT { [ rdf:subject ?s ;
* rdf:predicate ?p ;
* rdf:object ?o ] . }
* WHERE {
* ?s ?p ?o .
* }
*
*/
public void test_construct_reif_1() throws Exception {
new TestHelper(
"construct-reif-1", // testURI,
"construct-reif-1.rq",// queryFileURL
"construct-reif-1.ttl",// dataFileURL
"construct-reif-1-result.ttl"// resultFileURL
).runTest();
}
/**
*
* SELECT
* WHERE { ?a ?a ?b . }
*
*
* Note: This will drag in the SPORelation on the cluster if the backchain
* access path for (?foo rdf:type rdfs:Resource)
is enabled. I
* have explicitly disabled the backchainers in scale-out so that people
* will not observe this problem. This was done in the Sail, which is where
* they are configured.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/32
*/
public void test_dawg_triple_pattern_03() throws Exception {
new TestHelper(
"dawg-tp-03", // testURI,
"dawg-tp-03.rq",// queryFileURL
"dawg-tp-03.ttl",// dataFileURL
"dawg-tp-03-result.ttl"// resultFileURL
).runTest();
}
/**
*
* PREFIX foaf:
* SELECT ?name ?mbox
* WHERE { ?x foaf:name ?name .
* OPTIONAL { ?x foaf:mbox ?mbox }
* }
* ORDER BY ASC(?mbox)
*
*
* Both this test (sort-3
) and
* sparql11-aggregate-group-01
(an aggregation query, which I
* believe is a Sesame specific test rather than a DAWG compliance test) are
* hanging in scale-out CI runs. I suspect that this has to do with failing
* to correctly trigger the last pass evaluation required by both sort and
* aggregation.
*/
public void test_sort_3() throws Exception {
new TestHelper(
"sort-3", // testURI,
"sort-3.rq",// queryFileURL
"sort-3.ttl",// dataFileURL
"sort-3-result.rdf"// resultFileURL
).runTest();
}
/**
* DAWG SPARQL 1.0 test
*
*
* PREFIX :
* PREFIX xsd:
* SELECT DISTINCT *
* WHERE {
* { ?s :p ?o } UNION { ?s :q ?o }
* }
*
*/
public void test_distinct_star_1() throws Exception {
new TestHelper(
"distinct-star-1", // testURI,
"distinct-star-1.rq",// queryFileURL
"distinct-star-1.ttl",// dataFileURL
"distinct-star-1.srx"// resultFileURL
).runTest();
}
/**
* DAWG test ("find pairs that don't value-compare").
*
*
* PREFIX :
* PREFIX xsd:
*
* SELECT ?x ?v1 ?y ?v2
* {
* ?x :p ?v1 .
* ?y :p ?v2 .
* OPTIONAL { ?y :p ?v3 . FILTER( ?v1 != ?v3 || ?v1 = ?v3 )}
* FILTER (!bound(?v3))
* }
*
*
*
* mf:notable mf:IllFormedLiteral ;
* mf:requires mf:KnownTypesDefault2Neq ;
* mf:requires mf:LangTagAwareness ;
*
*/
public void test_open_eq_12() throws Exception {
new TestHelper(
"open-eq-12", // testURI,
"open-eq-12.rq",// queryFileURL
"open-eq-12.ttl",// dataFileURL
"open-eq-12.srx"// resultFileURL
).runTest();
}
/**
* DAWG test (FILTER inside an OPTIONAL does not block an entire solution).
*
* PREFIX dc:
* PREFIX x:
* SELECT ?title ?price
* WHERE
* { ?book dc:title ?title .
* OPTIONAL
* { ?book x:price ?price .
* FILTER (?price < 15) .
* } .
* }
*
*/
public void test_OPTIONAL_FILTER() throws Exception {
new TestHelper(
"OPTIONAL-FILTER", // testURI,
"OPTIONAL_FILTER.rq",// queryFileURL
"OPTIONAL_FILTER.ttl",// dataFileURL
"OPTIONAL_FILTER-result.ttl"// resultFileURL
).runTest();
}
/**
* This is sesame TCK test sparql11-subquery-04
. We picked it
* up with Sesame 2.6.3. It fails in the TCK run, but it appears to be Ok
* when run at this layer.
*
* Ah. This is using ORDER BY on an aggregate, so this is the same as
* {@link #test_sparql11_order_02()}.
*
* Query:
*
*
* PREFIX ex:
* SELECT ?friend
* WHERE {
* ?popular ex:knows ?friend .
* {
* SELECT ?popular
* WHERE {
* ?someone ex:knows ?popular
* }
* GROUP BY ?popular
* ORDER BY DESC(COUNT(?someone))
* LIMIT 2
* }
* }
*
*
* Data:
*
*
* @prefix ex: .
*
* ex:george ex:knows ex:ringo, ex:john .
* ex:ringo ex:knows ex:george, ex:john.
* ex:paul ex:knows ex:george, ex:john .
*
*/
public void test_sparql11_subquery_04() throws Exception {
assertTrue(store.isQuads());
new TestHelper(
"sparql11-subquery-04", // testURI,
"sparql11-subquery-04.rq",// queryFileURL
"sparql11-subquery-04.ttl",// dataFileURL
"sparql11-subquery-04.srx"// resultFileURL
).runTest();
}
/**
* TCK (non-DAWG) query we picked up with Sesame 2.6.3
. I've
* modified the test slightly to report the aggregate used in the ORDER BY
* through the SELECT.
*
*
* SELECT ?type (count(?subj) as ?cnt)
* WHERE { ?subj a ?type }
* GROUP BY ?type
* ORDER BY (count(?subj))
*
*
*
* @prefix : .
*
* :one a :Number .
* :two a :Number .
* :three a :Number .
* :four a :Number .
* :five a :Number .
* :six a :Number .
* :seven a :Number .
* :eight a :Number .
* :nine a :Number .
* :ten a :Number .
* :a a :Letter .
* :b a :Letter .
* :c a :Letter .
* :Sunday a :Day .
* :Monday a :Day .
* :Tuesday a :Day .
* :Wednesday a :Day .
* :Thursday a :Day .
* :Friday a :Day .
* :Saturday a :Day .
*
*
*
*
*
*
*
*
*
*
*
* http://example.org/Letter
*
*
*
*
* http://example.org/Day
*
*
*
*
* http://example.org/Number
*
*
*
*
*
*
* @see ORDER by
* GROUP aggregate
*
* @see Allow
* aggregates in ORDER BY clause
*/
public void test_sparql11_order_02() throws Exception {
if(!BigdataStatics.runKnownBadTests) return;
new TestHelper("sparql11-order-02", // testURI,
"sparql11-order-02.rq",// queryFileURL
"sparql11-order-02.ttl",// dataFileURL
"sparql11-order-02.srx"// resultFileURL
,true// checkOrder
).runTest();
}
/**
* Variant of {@link #test_sparql11_order_02()} which works around the LCWC
* change in SPARQL ORDER BY semantics.
*
*
* SELECT ?type (count(?subj) as ?cnt)
* WHERE { ?subj a ?type }
* GROUP BY ?type
* ORDER BY ?cnt
*
*
* @throws Exception
*/
public void test_sparql11_order_02_workaround() throws Exception {
new TestHelper("sparql11-order-02-workaround", // testURI,
"sparql11-order-02-workaround.rq",// queryFileURL
"sparql11-order-02.ttl",// dataFileURL
"sparql11-order-02.srx"// resultFileURL
,true// checkOrder
).runTest();
}
/**
* TCK (non-DAWG) query we picked up with Sesame 2.6.3
*
*
* SELECT ?type
* WHERE { ?subj a ?type }
* GROUP BY ?type
* ORDER BY DESC(count(?subj))
*
*
* @see ORDER by GROUP aggregate
*/
public void test_sparql11_order_03() throws Exception {
if(!BigdataStatics.runKnownBadTests) return;
new TestHelper("sparql11-order-03", // testURI,
"sparql11-order-03.rq",// queryFileURL
"sparql11-order-03.ttl",// dataFileURL
"sparql11-order-03.srx"// resultFileURL
,true// checkOrder
).runTest();
}
/**
* This is based on service13
, which is an openrdf Federated
* Query test. The test was failing because the two SERVICE joins do not
* share any variables and use a hash join. Thus, we had two solutions from
* the first SERVICE and sent two empty solutions to the second
* SERVICE. This caused the 2nd SERVICE to deliver twice as many solutions
* as is expected. Because the hash join was not correlated through a rowId,
* we wind up wind a full cross product of both (empty) source solutions and
* all solutions coming back from the 2nd service.
*
* It is interesting that this test passes without modification when run on
* purely local data. This suggests that the problem is not one of bottom up
* evaluation, but the lack of a variable to correlate the result sets from
* the remote services.
*
* Note: This test is a rewrite of the same query to run against purely
* local data. All I did was comment out the SERVICE keyword and service
* URI.
*
*
* # Test for SES 899
*
* PREFIX :
* PREFIX owl:
*
* SELECT ?a ?b
* {
* #SERVICE
* {
* ?a a ?t1
* filter(?t1 = owl:Class)
* }
* #SERVICE
* {
* ?b a ?t2
* filter(?t2 = owl:Class)
* }
* }
*
*/
public void test_join_with_no_shared_variables() throws Exception {
final ASTContainer astContainer = new TestHelper(
"join_with_no_shared_variables", // testURI,
"join_with_no_shared_variables.rq",// queryFileURL
"join_with_no_shared_variables.ttl",// dataFileURL
"join_with_no_shared_variables.srx"// resultFileURL
,false// checkOrder
).runTest();
if (log.isInfoEnabled())
log.info(astContainer.toString());
}
/**
* Execute the stress tests a couple of times.
*
* @throws Exception
*/
public void test_stressTests() throws Exception {
for (int i = 0; i < 100; i++) {
final TestSuite suite = new TestSuite(
TCKStressTests.class.getSimpleName());
suite.addTestSuite(TCKStressTests.class);
suite.run(new TestResult());
}
}
// /**
// * This is BSBM BI query 05 on the PC100 data set. We picked this up with
// * Sesame 2.6.3. It is failing with a "solution set not found" error in the
// * TCK. The problem is reproduced by this test. However, the test is a bit
// * long running, so take it out of this test suite once we get to the bottom
// * of the problem.
// *
// * I finally tracked this down to an error in the logic to decide on a merge
// * join. The story is documented at the trac issue below. However, even
// * after all that the predicted result for openrdf differs at the 4th
// * decimal place. I have therefore filtered out this test from the openrdf
// * TCK.
// *
// *
// * Missing bindings:
// * [product=http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer2/Product63;
// * nrOfReviews="3"^^;
// * avgPrice="4207.426"^^;
// * country=http://downlode.org/rdf/iso-3166/countries#RU]
// * ====================================================
// * Unexpected bindings:
// * [country=http://downlode.org/rdf/iso-3166/countries#RU;
// * product=http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer2/Product63;
// * nrOfReviews="3"^^;
// * avgPrice="4207.4263"^^]
// *
// *
// * @see
// * BSBM BI Q5 Error when using MERGE JOIN
// */
// public void test_BSBM_BI_05() throws Exception {
//
// new TestHelper(
// "BSBM BI 05", // testURI,
// "bsbm-bi-q5.rq",// queryFileURL
// "bsbm-100.ttl",// dataFileURL // per the openrdf test.
//// "bsbm/bsbm3_dataset_pc10.nt",// dataFileURL
// "bsbm-bi-q5.srx"// resultFileURL // per the openrdf test.
// ,false// laxCardinality
// ,true// checkOrder
// ).runTest();
//
// }
/**
* Tests to be executed in a stress test fashion, i.e. multiple times.
*
* @author msc
*/
public static class TCKStressTests extends AbstractDataDrivenSPARQLTestCase {
/**
*
*/
public TCKStressTests() {
}
/**
* @param name
*/
public TCKStressTests(String name) {
super(name);
}
/**
* Optional-filter - 1
*
*
* PREFIX :
*
* SELECT *
* {
* ?x :p ?v .
* OPTIONAL
* {
* ?y :q ?w .
* FILTER(?v=2)
* }
* }
*
*
* A FILTER inside an OPTIONAL can reference a variable bound in the
* required part of the OPTIONAL
*
* @see ASTBottomUpOptimizer
* @see ASTSimpleOptionalOptimizer
*/
public void test_opt_filter_1() throws Exception {
new TestHelper("opt-filter-1", // testURI,
"opt-filter-1.rq",// queryFileURL
"opt-filter-1.ttl",// dataFileURL
"opt-filter-1.srx"// resultFileURL
).runTest();
}
}
}