org.eclipse.rdf4j.federated.evaluation.FederationEvalStrategy Maven / Gradle / Ivy
Show all versions of rdf4j-tools-federation Show documentation
/*******************************************************************************
* Copyright (c) 2019 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.federated.evaluation;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.eclipse.rdf4j.collection.factory.api.CollectionFactory;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.common.iteration.EmptyIteration;
import org.eclipse.rdf4j.common.iteration.SingletonIteration;
import org.eclipse.rdf4j.federated.FedX;
import org.eclipse.rdf4j.federated.FedXConfig;
import org.eclipse.rdf4j.federated.FederationContext;
import org.eclipse.rdf4j.federated.algebra.CheckStatementPattern;
import org.eclipse.rdf4j.federated.algebra.ConjunctiveFilterExpr;
import org.eclipse.rdf4j.federated.algebra.EmptyResult;
import org.eclipse.rdf4j.federated.algebra.EmptyStatementPattern;
import org.eclipse.rdf4j.federated.algebra.ExclusiveGroup;
import org.eclipse.rdf4j.federated.algebra.ExclusiveStatement;
import org.eclipse.rdf4j.federated.algebra.ExclusiveTupleExpr;
import org.eclipse.rdf4j.federated.algebra.ExclusiveTupleExprRenderer;
import org.eclipse.rdf4j.federated.algebra.FedXArbitraryLengthPath;
import org.eclipse.rdf4j.federated.algebra.FedXLeftJoin;
import org.eclipse.rdf4j.federated.algebra.FedXService;
import org.eclipse.rdf4j.federated.algebra.FedXZeroLengthPath;
import org.eclipse.rdf4j.federated.algebra.FederatedDescribeOperator;
import org.eclipse.rdf4j.federated.algebra.FilterExpr;
import org.eclipse.rdf4j.federated.algebra.FilterValueExpr;
import org.eclipse.rdf4j.federated.algebra.HolderNode;
import org.eclipse.rdf4j.federated.algebra.NJoin;
import org.eclipse.rdf4j.federated.algebra.NUnion;
import org.eclipse.rdf4j.federated.algebra.SingleSourceQuery;
import org.eclipse.rdf4j.federated.algebra.StatementSource;
import org.eclipse.rdf4j.federated.algebra.StatementSource.StatementSourceType;
import org.eclipse.rdf4j.federated.algebra.StatementSourcePattern;
import org.eclipse.rdf4j.federated.algebra.StatementTupleExpr;
import org.eclipse.rdf4j.federated.cache.CacheUtils;
import org.eclipse.rdf4j.federated.cache.SourceSelectionCache;
import org.eclipse.rdf4j.federated.endpoint.Endpoint;
import org.eclipse.rdf4j.federated.evaluation.concurrent.ControlledWorkerScheduler;
import org.eclipse.rdf4j.federated.evaluation.concurrent.ParallelServiceExecutor;
import org.eclipse.rdf4j.federated.evaluation.iterator.FedXPathIteration;
import org.eclipse.rdf4j.federated.evaluation.iterator.FederatedDescribeIteration;
import org.eclipse.rdf4j.federated.evaluation.iterator.SingleBindingSetIteration;
import org.eclipse.rdf4j.federated.evaluation.join.ControlledWorkerBoundJoin;
import org.eclipse.rdf4j.federated.evaluation.join.ControlledWorkerJoin;
import org.eclipse.rdf4j.federated.evaluation.join.ControlledWorkerLeftJoin;
import org.eclipse.rdf4j.federated.evaluation.join.SynchronousBoundJoin;
import org.eclipse.rdf4j.federated.evaluation.join.SynchronousJoin;
import org.eclipse.rdf4j.federated.evaluation.union.ControlledWorkerUnion;
import org.eclipse.rdf4j.federated.evaluation.union.ParallelGetStatementsTask;
import org.eclipse.rdf4j.federated.evaluation.union.ParallelPreparedAlgebraUnionTask;
import org.eclipse.rdf4j.federated.evaluation.union.ParallelPreparedUnionTask;
import org.eclipse.rdf4j.federated.evaluation.union.ParallelUnionOperatorTask;
import org.eclipse.rdf4j.federated.evaluation.union.SynchronousWorkerUnion;
import org.eclipse.rdf4j.federated.evaluation.union.WorkerUnionBase;
import org.eclipse.rdf4j.federated.exception.FedXRuntimeException;
import org.eclipse.rdf4j.federated.exception.IllegalQueryException;
import org.eclipse.rdf4j.federated.optimizer.DefaultFedXCostModel;
import org.eclipse.rdf4j.federated.optimizer.ExclusiveTupleExprOptimizer;
import org.eclipse.rdf4j.federated.optimizer.FilterOptimizer;
import org.eclipse.rdf4j.federated.optimizer.GenericInfoOptimizer;
import org.eclipse.rdf4j.federated.optimizer.LimitOptimizer;
import org.eclipse.rdf4j.federated.optimizer.ServiceOptimizer;
import org.eclipse.rdf4j.federated.optimizer.SourceSelection;
import org.eclipse.rdf4j.federated.optimizer.StatementGroupAndJoinOptimizer;
import org.eclipse.rdf4j.federated.optimizer.UnionOptimizer;
import org.eclipse.rdf4j.federated.structures.FedXDataset;
import org.eclipse.rdf4j.federated.structures.QueryInfo;
import org.eclipse.rdf4j.federated.structures.QueryType;
import org.eclipse.rdf4j.federated.util.FedXUtil;
import org.eclipse.rdf4j.federated.util.QueryStringUtil;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.BooleanLiteral;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.MalformedQueryException;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.algebra.DescribeOperator;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.QueryRoot;
import org.eclipse.rdf4j.query.algebra.Service;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.ValueExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep;
import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException;
import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedService;
import org.eclipse.rdf4j.query.algebra.evaluation.federation.ServiceJoinIterator;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.DefaultEvaluationStrategy;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategy;
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.BadlyDesignedLeftJoinIterator;
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.DescribeIteration;
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.HashJoinIteration;
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.ConstantOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.DisjunctiveConstraintOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtil;
import org.eclipse.rdf4j.query.algebra.helpers.TupleExprs;
import org.eclipse.rdf4j.query.algebra.helpers.collectors.VarNameCollector;
import org.eclipse.rdf4j.query.impl.EmptyBindingSet;
import org.eclipse.rdf4j.query.impl.FallbackDataset;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.repository.sparql.federation.CollectionIteration;
import org.eclipse.rdf4j.repository.sparql.federation.RepositoryFederatedService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Base class for the Evaluation strategies.
*
* @author Andreas Schwarte
*
* @see SailFederationEvalStrategy
* @see SparqlFederationEvalStrategy
*
*/
public abstract class FederationEvalStrategy extends StrictEvaluationStrategy {
private static final Logger log = LoggerFactory.getLogger(FederationEvalStrategy.class);
protected Executor executor;
protected SourceSelectionCache cache;
protected FederationContext federationContext;
public FederationEvalStrategy(FederationContext federationContext) {
super(new org.eclipse.rdf4j.query.algebra.evaluation.TripleSource() {
@Override
public CloseableIteration extends Statement> getStatements(
Resource subj, IRI pred, Value obj, Resource... contexts)
throws QueryEvaluationException {
throw new FedXRuntimeException(
"Federation Strategy does not support org.eclipse.rdf4j.query.algebra.evaluation.TripleSource#getStatements."
+
" If you encounter this exception, please report it.");
}
@Override
public ValueFactory getValueFactory() {
return SimpleValueFactory.getInstance();
}
}, federationContext.getFederatedServiceResolver());
this.federationContext = federationContext;
this.executor = federationContext.getManager().getExecutor();
this.cache = federationContext.getSourceSelectionCache();
}
@Override
public TupleExpr optimize(TupleExpr expr, EvaluationStatistics evaluationStatistics,
BindingSet bindings) {
if (!(evaluationStatistics instanceof FederationEvaluationStatistics)) {
throw new FedXRuntimeException(
"Expected FederationEvaluationStatistics, was " + evaluationStatistics.getClass());
}
FederationEvaluationStatistics stats = (FederationEvaluationStatistics) evaluationStatistics;
QueryInfo queryInfo = stats.getQueryInfo();
List members = getMembersFromContext(stats);
// Clone the tuple expression to allow for more aggressive optimizations
TupleExpr clone = expr.clone();
TupleExpr query;
if (clone instanceof QueryRoot) {
query = clone;
} else {
query = new QueryRoot(clone);
}
GenericInfoOptimizer info = new GenericInfoOptimizer(queryInfo);
// collect information and perform generic optimizations
info.optimize(query);
// if the federation has a single member only, evaluate the entire query there
if (members.size() == 1 && queryInfo.getQuery() != null && propagateServices(info.getServices())
&& queryInfo.getQueryType() != QueryType.UPDATE) {
return new SingleSourceQuery(expr, members.get(0), queryInfo);
}
if (log.isTraceEnabled()) {
log.trace("Query before Optimization: " + query);
}
/* original RDF4J optimizers */
new ConstantOptimizer(this).optimize(query, dataset, bindings); // maybe remove this optimizer later
new DisjunctiveConstraintOptimizer().optimize(query, dataset, bindings);
/*
* TODO add some generic optimizers: - FILTER ?s=1 && ?s=2 => EmptyResult - Remove variables that are not
* occurring in query stmts from filters
*/
/* custom optimizers, execute only when needed */
// if the query has a single relevant source (and if it is not a SERVICE query), evaluate at this source only
// Note: UPDATE queries are always handled in the federation engine to adhere to the configured
// write strategy. Also DESCRIBE queries are handled in the federation
Set relevantSources = performSourceSelection(members, cache, queryInfo, info);
if (relevantSources.size() == 1 && propagateServices(info.getServices())
&& queryInfo.getQueryType() != QueryType.UPDATE && queryInfo.getQueryType() != QueryType.DESCRIBE) {
return new SingleSourceQuery(query, relevantSources.iterator().next(), queryInfo);
}
if (info.hasService()) {
new ServiceOptimizer(queryInfo).optimize(query);
}
// optimize unions, if available
if (info.hasUnion()) {
new UnionOptimizer(queryInfo).optimize(query);
}
optimizeExclusiveExpressions(query, queryInfo, info);
// optimize statement groups and join order
optimizeJoinOrder(query, queryInfo, info);
// potentially push limits (if applicable)
if (info.hasLimit()) {
new LimitOptimizer().optimize(query);
}
// optimize Filters, if available
// Note: this is done after the join order is determined to ease filter pushing
if (info.hasFilter()) {
new FilterOptimizer().optimize(query);
}
if (log.isTraceEnabled()) {
log.trace("Query after Optimization: " + query);
}
return query;
}
/**
* Returns the federation members that are active in the current federation. By default it is all federation
* members. If the passed {@link Dataset} is a {@link FedXDataset}, the defined {@link FedXDataset#getEndpoints()}
* are used.
*
* @param evaluationStatisticss to keep the current query context
* @return
*/
protected List getMembersFromContext(FederationEvaluationStatistics evaluationStatisticss) {
Dataset queryDataset = evaluationStatisticss.getDataset();
// if the query uses a FROM clause, the external dataset
// is wrapped as primary dataset in FallbackDataset
if (queryDataset instanceof FallbackDataset) {
queryDataset = ((FallbackDataset) queryDataset).getPrimary();
}
if (queryDataset instanceof FedXDataset) {
// run the query against a selected set of endpoints
FedXDataset ds = (FedXDataset) queryDataset;
return federationContext.getEndpointManager().getEndpoints(ds.getEndpoints());
} else {
// evaluate against entire federation
FedX fed = federationContext.getFederation();
return fed.getMembers();
}
}
/**
* Perform source selection for all statements of the query. As a result of this method all statement nodes are
* annotated with their relevant sources.
*
* @param members
* @param cache
* @param queryInfo
* @param info
* @return the set of relevant endpoints for the entire query
*/
protected Set performSourceSelection(List members, SourceSelectionCache cache,
QueryInfo queryInfo,
GenericInfoOptimizer info) {
Set relevantMembers = new HashSet<>();
var stmtPatterns = new HashSet<>(info.getStatements());
if (info.hasPathExpression()) {
var handled = new HashSet<>();
for (StatementPattern stmt : stmtPatterns) {
if (stmt.getParentNode() instanceof FedXArbitraryLengthPath) {
var pathExpr = (FedXArbitraryLengthPath) stmt.getParentNode();
var identifiedMembers = performSourceSelection(pathExpr, stmt, members, cache, queryInfo);
relevantMembers.addAll(identifiedMembers);
handled.add(stmt);
}
}
stmtPatterns.removeAll(handled);
}
// Source Selection: all nodes are annotated with their source
SourceSelection sourceSelection = new SourceSelection(members, cache, queryInfo);
sourceSelection.doSourceSelection(info.getStatements());
relevantMembers.addAll(sourceSelection.getRelevantSources());
return relevantMembers;
}
/**
* Perform source selection on the statement pattern representing the path expression. Source selection must be a
* subset of the provided members.
*
* The implementation expects to replace the statement, e.g. with {@link StatementSourcePattern},
* {@link ExclusiveStatement} or {@link EmptyStatementPattern}
*
* @param pathExpr
* @param stmt
* @param members
* @param cache
* @param queryInfo
* @return identified relevant members
*/
protected Set performSourceSelection(FedXArbitraryLengthPath pathExpr, StatementPattern stmt,
List members, SourceSelectionCache cache,
QueryInfo queryInfo) {
// for now the strategy is to do source selection on a statement pattern containing the predicate
// of the original path expression.
// Note: for zero-length path we add all members (as there does not have to exist a path with the
// predicate)
Set identifiedMembers;
if (pathExpr.getMinLength() == 0) {
identifiedMembers = new HashSet<>(members);
} else {
StatementPattern checkStmt = new StatementPattern(stmt.getScope(), new Var("subject"),
clone(stmt.getPredicateVar()), new Var("object"), clone(stmt.getContextVar()));
@SuppressWarnings("unused") // only used as artificial parent
HolderNode holderParent = new HolderNode(checkStmt);
SourceSelection sourceSelection = new SourceSelection(members, cache, queryInfo);
sourceSelection.doSourceSelection(List.of(checkStmt));
identifiedMembers = sourceSelection.getRelevantSources();
}
var statementSources = identifiedMembers.stream()
.map(e -> new StatementSource(e.getId(), StatementSourceType.REMOTE))
.collect(Collectors.toList());
if (statementSources.size() == 1) {
var replacement = new ExclusiveStatement(stmt, statementSources.iterator().next(), queryInfo);
stmt.replaceWith(replacement);
} else if (statementSources.size() > 1) {
var replacement = new StatementSourcePattern(stmt, queryInfo);
for (var stmtSource : statementSources) {
((StatementSourcePattern) replacement).addStatementSource(stmtSource);
}
stmt.replaceWith(replacement);
} else {
var replacement = new EmptyStatementPattern(stmt);
stmt.replaceWith(replacement);
}
return identifiedMembers;
}
private Var clone(Var var) {
if (var == null) {
return null;
}
return var.clone();
}
protected void optimizeJoinOrder(TupleExpr query, QueryInfo queryInfo, GenericInfoOptimizer info) {
// optimize statement groups and join order
new StatementGroupAndJoinOptimizer(queryInfo, DefaultFedXCostModel.INSTANCE).optimize(query);
}
/**
* Whether to propagate a {@link SingleSourceQuery} containing SERVICE clauses. By default, the query is always
* evaluated within the FedX engine if it contains a SERVICE clause.
*
* Customized implementation may propagate a {@link SingleSourceQuery} including the SERVICE clause (e.g. for
* Wikidata the Label service can only be accessed in the wikidata endpoint.
*
*
* @param serviceNodes
* @return if true
, a {@link SingleSourceQuery} containing SERVICE clauses is propagated as-is
*/
protected boolean propagateServices(List serviceNodes) {
boolean hasServices = serviceNodes != null && !serviceNodes.isEmpty();
if (hasServices) {
return false;
}
return true;
}
/**
* Optimize {@link ExclusiveTupleExpr}, e.g. restructure the exclusive parts of the query AST.
*
* @param query
* @param queryInfo
* @param info
*/
protected void optimizeExclusiveExpressions(TupleExpr query, QueryInfo queryInfo, GenericInfoOptimizer info) {
// identify exclusive expressions
new ExclusiveTupleExprOptimizer().optimize(query);
}
@Deprecated(forRemoval = true)
@Override
public CloseableIteration evaluate(
TupleExpr expr, BindingSet bindings)
throws QueryEvaluationException {
if (expr instanceof StatementTupleExpr) {
return ((StatementTupleExpr) expr).evaluate(bindings);
}
if (expr instanceof NJoin) {
return evaluateNJoin((NJoin) expr, bindings);
}
if (expr instanceof NUnion) {
return evaluateNaryUnion((NUnion) expr, bindings);
}
if (expr instanceof ExclusiveGroup) {
return ((ExclusiveGroup) expr).evaluate(bindings);
}
if (expr instanceof ExclusiveTupleExpr) {
return evaluateExclusiveTupleExpr((ExclusiveTupleExpr) expr, bindings);
}
if (expr instanceof FedXLeftJoin) {
return evaluateLeftJoin((FedXLeftJoin) expr, bindings);
}
if (expr instanceof SingleSourceQuery) {
return evaluateSingleSourceQuery((SingleSourceQuery) expr, bindings);
}
if (expr instanceof FedXService) {
return evaluateService((FedXService) expr, bindings);
}
if (expr instanceof FedXArbitraryLengthPath) {
return evaluateArbitrayLengthPath((FedXArbitraryLengthPath) expr, bindings);
}
if (expr instanceof FedXZeroLengthPath) {
return evaluateZeroLengthPath((FedXZeroLengthPath) expr, bindings);
}
if (expr instanceof EmptyResult) {
return new EmptyIteration<>();
}
return super.evaluate(expr, bindings);
}
@Override
public QueryEvaluationStep precompile(
TupleExpr expr, QueryEvaluationContext context)
throws QueryEvaluationException {
if (expr instanceof Join) {
return QueryEvaluationStep.minimal(this, expr);
}
if (expr instanceof StatementTupleExpr) {
return QueryEvaluationStep.minimal(this, expr);
}
if (expr instanceof NJoin) {
return prepareNJoin((NJoin) expr, context);
}
if (expr instanceof NUnion) {
return prepareNaryUnion((NUnion) expr, context);
}
if (expr instanceof ExclusiveGroup) {
return QueryEvaluationStep.minimal(this, expr);
}
if (expr instanceof ExclusiveTupleExpr) {
return prepareExclusiveTupleExpr((ExclusiveTupleExpr) expr, context);
}
if (expr instanceof FedXLeftJoin) {
return prepareLeftJoin((FedXLeftJoin) expr, context);
}
if (expr instanceof SingleSourceQuery) {
return QueryEvaluationStep.minimal(this, expr);
}
if (expr instanceof FedXService) {
return QueryEvaluationStep.minimal(this, expr);
}
if (expr instanceof FedXArbitraryLengthPath) {
return prepare((FedXArbitraryLengthPath) expr, context);
}
if (expr instanceof FedXZeroLengthPath) {
return prepare((FedXZeroLengthPath) expr, context);
}
if (expr instanceof EmptyResult) {
return QueryEvaluationStep.minimal(this, expr);
}
return super.precompile(expr, context);
}
/**
* Retrieve the statements matching the provided subject, predicate and object value from the federation members.
*
*
* For a bound statement, i.e. a statement with no free variables, the statement itself is returned if some member
* has this statement, an empty iteration otherwise.
*
*
* If the statement has free variables, i.e. one of the provided arguments in null
, the union of
* results from relevant statement sources is constructed.
*
* @param subj
* @param pred
* @param obj
* @param contexts
* @return the statement iteration
*
* @throws RepositoryException
* @throws MalformedQueryException
* @throws QueryEvaluationException
*/
public CloseableIteration getStatements(QueryInfo queryInfo, Resource subj,
IRI pred, Value obj, Resource... contexts)
throws RepositoryException, MalformedQueryException, QueryEvaluationException {
List members = federationContext.getFederation().getMembers();
// a bound query: if at least one fed member provides results
// return the statement, otherwise empty result
if (subj != null && pred != null && obj != null) {
if (CacheUtils.checkCacheUpdateCache(cache, members, subj, pred, obj, queryInfo, contexts)) {
return new SingletonIteration<>(
FedXUtil.valueFactory().createStatement(subj, pred, obj));
}
return new EmptyIteration<>();
}
// form the union of results from relevant endpoints
List sources = CacheUtils.checkCacheForStatementSourcesUpdateCache(cache, members, subj, pred,
obj, queryInfo, contexts);
if (sources.isEmpty()) {
return new EmptyIteration<>();
}
if (sources.size() == 1) {
Endpoint e = federationContext.getEndpointManager().getEndpoint(sources.get(0).getEndpointID());
return e.getTripleSource().getStatements(subj, pred, obj, queryInfo, contexts);
}
// TODO why not collect in parallel?
WorkerUnionBase union = new SynchronousWorkerUnion<>(queryInfo);
for (StatementSource source : sources) {
Endpoint e = federationContext.getEndpointManager().getEndpoint(source.getEndpointID());
ParallelGetStatementsTask task = new ParallelGetStatementsTask(union, e, subj, pred, obj, queryInfo,
contexts);
union.addTask(task);
}
// run the union in a separate thread
executor.execute(union);
// TODO distinct iteration ?
return union;
}
public CloseableIteration evaluateService(FedXService service,
BindingSet bindings) throws QueryEvaluationException {
ParallelServiceExecutor pe = new ParallelServiceExecutor(service, this, bindings, federationContext);
pe.run(); // non-blocking (blocking happens in the iterator)
return pe;
}
public CloseableIteration evaluateSingleSourceQuery(SingleSourceQuery query,
BindingSet bindings) throws QueryEvaluationException {
try {
Endpoint source = query.getSource();
return source.getTripleSource()
.getStatements(query.getQueryString(), bindings, query.getQueryInfo().getQueryType(),
query.getQueryInfo());
} catch (RepositoryException | MalformedQueryException e) {
throw new QueryEvaluationException(e);
}
}
public CloseableIteration evaluateNJoin(NJoin join, BindingSet bindings)
throws QueryEvaluationException {
return precompile(join).evaluate(bindings);
}
protected QueryEvaluationStep prepareNJoin(NJoin join, QueryEvaluationContext context)
throws QueryEvaluationException {
QueryEvaluationStep resultProvider = precompile(join.getArg(0), context);
ControlledWorkerScheduler joinScheduler = federationContext.getManager().getJoinScheduler();
return bindings -> {
CloseableIteration result = null;
try {
result = resultProvider.evaluate(bindings);
for (int i = 1, n = join.getNumberOfArguments(); i < n; i++) {
result = executeJoin(joinScheduler, result, join.getArg(i), join.getJoinVariables(i), bindings,
join.getQueryInfo());
}
} catch (Throwable t) {
if (result != null) {
result.close();
}
throw t;
}
return result;
};
}
protected CloseableIteration evaluateArbitrayLengthPath(
FedXArbitraryLengthPath alp, BindingSet bindings)
throws QueryEvaluationException {
return precompile(alp).evaluate(bindings);
}
protected QueryEvaluationStep prepare(FedXArbitraryLengthPath alp, QueryEvaluationContext context)
throws QueryEvaluationException {
return (bindings) -> new FedXPathIteration(FederationEvalStrategy.this, alp.getScope(), alp.getSubjectVar(),
alp.getPathExpression(), alp.getObjectVar(), alp.getContextVar(), alp.getMinLength(), bindings,
alp.getQueryInfo());
}
protected CloseableIteration evaluateZeroLengthPath(FedXZeroLengthPath zlp,
BindingSet bindings)
throws QueryEvaluationException {
return precompile(zlp).evaluate(bindings);
}
protected QueryEvaluationStep prepare(FedXZeroLengthPath zlp, QueryEvaluationContext context)
throws QueryEvaluationException {
final Var subjectVar = zlp.getSubjectVar();
final Var objVar = zlp.getObjectVar();
final Var contextVar = zlp.getContextVar();
QueryValueEvaluationStep subPrep = precompile(subjectVar, context);
QueryValueEvaluationStep objPrep = precompile(objVar, context);
return new FedXZeroLengthPathEvaluationStep(subjectVar, objVar, contextVar, subPrep, objPrep, this, context,
() -> zlp.getStatementSources(), () -> zlp.getQueryInfo());
}
/**
* Evaluate a {@link FedXLeftJoin} (i.e. an OPTIONAL clause)
*
* @param leftJoin
* @param bindings
* @return the resulting iteration
* @throws QueryEvaluationException
* @see StrictEvaluationStrategy#evaluate(org.eclipse.rdf4j.query.algebra.LeftJoin, BindingSet)
*/
protected CloseableIteration evaluateLeftJoin(FedXLeftJoin leftJoin,
final BindingSet bindings) throws QueryEvaluationException {
return precompile(leftJoin).evaluate(bindings);
}
protected QueryEvaluationStep prepareLeftJoin(FedXLeftJoin leftJoin, QueryEvaluationContext context)
throws QueryEvaluationException {
/*
* NOTE: this implementation is taken from StrictEvaluationStrategy.evaluate(LeftJoin, BindingSet)
*
* However, we have to take care for some concurrency scheduling to guarantee the order in which subqueries are
* executed.
*/
if (TupleExprs.containsSubquery(leftJoin.getRightArg())) {
return new QueryEvaluationStep() {
final QueryEvaluationStep leftES = precompile(leftJoin.getLeftArg(), context);
final QueryEvaluationStep rightES = precompile(leftJoin.getRightArg(), context);
@Override
public CloseableIteration evaluate(BindingSet bindings) {
String[] hashJoinAttributeNames = HashJoinIteration.hashJoinAttributeNames(leftJoin);
return new HashJoinIteration(leftES, rightES, bindings, true, hashJoinAttributeNames, context);
}
};
} else {
// Check whether optional join is "well designed" as defined in section
// 4.2 of "Semantics and Complexity of SPARQL", 2006, Jorge Pérez et al.
VarNameCollector optionalVarCollector = new VarNameCollector();
leftJoin.getRightArg().visit(optionalVarCollector);
if (leftJoin.hasCondition()) {
leftJoin.getCondition().visit(optionalVarCollector);
}
Set leftBindingNames = leftJoin.getLeftArg().getBindingNames();
Set problemVars = retainAll(optionalVarCollector.getVarNames(), leftBindingNames);
QueryEvaluationStep leftPrepared = precompile(leftJoin.getLeftArg(), context);
// left join is "well designed"
ControlledWorkerScheduler scheduler = federationContext.getManager().getLeftJoinScheduler();
return bindings -> {
if (problemVars.containsAll(bindings.getBindingNames())) {
var leftIter = leftPrepared.evaluate(bindings);
ControlledWorkerLeftJoin join = new ControlledWorkerLeftJoin(scheduler, FederationEvalStrategy.this,
leftIter, leftJoin, bindings, leftJoin.getQueryInfo());
executor.execute(join);
return join;
} else {
Set problemVarsClone = new HashSet<>(problemVars);
problemVarsClone.retainAll(bindings.getBindingNames());
return new BadlyDesignedLeftJoinIterator(FederationEvalStrategy.this, leftJoin, bindings,
problemVarsClone, context);
}
};
}
}
private Set retainAll(Set problemVars, Set leftBindingNames) {
if (!leftBindingNames.isEmpty() && !problemVars.isEmpty()) {
if (leftBindingNames.size() > problemVars.size()) {
for (String problemVar : problemVars) {
if (leftBindingNames.contains(problemVar)) {
HashSet ret = new HashSet<>(problemVars);
ret.removeAll(leftBindingNames);
return ret;
}
}
} else {
for (String leftBindingName : leftBindingNames) {
if (problemVars.contains(leftBindingName)) {
HashSet ret = new HashSet<>(problemVars);
ret.removeAll(leftBindingNames);
return ret;
}
}
}
}
return problemVars;
}
public CloseableIteration evaluateNaryUnion(NUnion union, BindingSet bindings)
throws QueryEvaluationException {
return precompile(union).evaluate(bindings);
}
public QueryEvaluationStep prepareNaryUnion(NUnion union, QueryEvaluationContext context)
throws QueryEvaluationException {
ControlledWorkerScheduler unionScheduler = federationContext.getManager().getUnionScheduler();
ControlledWorkerUnion unionRunnable = new ControlledWorkerUnion<>(unionScheduler,
union.getQueryInfo());
int numberOfArguments = union.getNumberOfArguments();
QueryEvaluationStep[] args = new QueryEvaluationStep[numberOfArguments];
for (int i = 0; i < numberOfArguments; i++) {
args[i] = precompile(union.getArg(i), context);
}
return bindings -> {
for (int i = 0; i < numberOfArguments; i++) {
unionRunnable.addTask(new ParallelUnionOperatorTask(unionRunnable, args[i], bindings));
}
executor.execute(unionRunnable);
return unionRunnable;
};
}
/**
* Execute the join in a separate thread using some join executor.
*
* Join executors are for instance: - {@link SynchronousJoin} - {@link SynchronousBoundJoin} -
* {@link ControlledWorkerJoin} - {@link ControlledWorkerBoundJoin}
*
* For endpoint federation use controlled worker bound join, for local federation use controlled worker join. The
* other operators are there for completeness.
*
* Use {@link FederationEvalStrategy#executor} to execute the join (it is a runnable).
*
* @param joinScheduler
* @param leftIter
* @param rightArg
* @param joinVariables
* @param bindings
* @return the result
* @throws QueryEvaluationException
*/
protected abstract CloseableIteration executeJoin(
ControlledWorkerScheduler joinScheduler,
CloseableIteration leftIter, TupleExpr rightArg,
Set joinVariables, BindingSet bindings, QueryInfo queryInfo) throws QueryEvaluationException;
public abstract CloseableIteration evaluateExclusiveGroup(
ExclusiveGroup group, BindingSet bindings)
throws RepositoryException, MalformedQueryException, QueryEvaluationException;
/**
* Evaluate an {@link ExclusiveTupleExpr}. The default implementation converts the given expression to a SELECT
* query string and evaluates it at the source.
*
* @param expr
* @param bindings
* @return the result
* @throws RepositoryException
* @throws MalformedQueryException
* @throws QueryEvaluationException
*/
protected CloseableIteration evaluateExclusiveTupleExpr(
ExclusiveTupleExpr expr,
BindingSet bindings) throws RepositoryException, MalformedQueryException, QueryEvaluationException {
return precompile(expr).evaluate(bindings);
}
protected QueryEvaluationStep prepareExclusiveTupleExpr(
ExclusiveTupleExpr expr,
QueryEvaluationContext context)
throws RepositoryException, MalformedQueryException, QueryEvaluationException {
if (expr instanceof StatementTupleExpr) {
return bindings -> ((StatementTupleExpr) expr).evaluate(bindings);
}
if (!(expr instanceof ExclusiveTupleExprRenderer)) {
return super.precompile(expr);
}
Endpoint ownedEndpoint = federationContext
.getEndpointManager()
.getEndpoint(expr.getOwner().getEndpointID());
TripleSource t = ownedEndpoint.getTripleSource();
return bindings -> {
AtomicBoolean isEvaluated = new AtomicBoolean(false);
FilterValueExpr filterValueExpr = null; // TODO consider optimization using FilterTuple
try {
String preparedQuery = QueryStringUtil.selectQueryString((ExclusiveTupleExprRenderer) expr,
bindings, filterValueExpr, isEvaluated, expr.getQueryInfo().getDataset());
return t.getStatements(preparedQuery, bindings, (isEvaluated.get() ? null : filterValueExpr),
expr.getQueryInfo());
} catch (IllegalQueryException e) {
/* no projection vars, e.g. local vars only, can occur in joins */
if (t.hasStatements(expr, bindings)) {
return new SingleBindingSetIteration(bindings);
}
return new EmptyIteration<>();
}
};
}
/**
* Evaluate a bound join at the relevant endpoint, i.e. i.e. for a group of bindings retrieve results for the bound
* statement from the relevant endpoints
*
* @param stmt
* @param bindings
* @return the result iteration
* @throws QueryEvaluationException
*/
public abstract CloseableIteration evaluateBoundJoinStatementPattern(
StatementTupleExpr stmt, final List bindings) throws QueryEvaluationException;
/**
* Perform a grouped check at the relevant endpoints, i.e. for a group of bindings keep only those for which at
* least one endpoint provides a result to the bound statement.
*
* @param stmt
* @param bindings
* @return the result iteration
* @throws QueryEvaluationException
*/
public abstract CloseableIteration evaluateGroupedCheck(
CheckStatementPattern stmt, final List bindings) throws QueryEvaluationException;
/**
* Evaluate a SERVICE using vectored evaluation, taking the provided bindings as input.
*
* See {@link ControlledWorkerBoundJoin} and {@link FedXConfig#getEnableServiceAsBoundJoin()}
*
* @param service
* @param bindings
* @return the result iteration
* @throws QueryEvaluationException
*/
public CloseableIteration evaluateService(FedXService service,
final List bindings) throws QueryEvaluationException {
Var serviceRef = service.getService().getServiceRef();
String serviceUri;
if (serviceRef.hasValue()) {
serviceUri = serviceRef.getValue().stringValue();
} else {
return new ServiceJoinIterator(new CollectionIteration<>(bindings),
service.getService(), EmptyBindingSet.getInstance(), this);
}
// use vectored evaluation
FederatedService fs = getService(serviceUri);
if (fs instanceof RepositoryFederatedService) {
// set the bound join block size to 0 => leave block size up to FedX engine
((RepositoryFederatedService) fs).setBoundJoinBlockSize(0);
}
return fs.evaluate(service.getService(), new CollectionIteration<>(bindings),
service.getService().getBaseURI());
}
@Deprecated(forRemoval = true)
@Override
public Value evaluate(ValueExpr expr, BindingSet bindings)
throws ValueExprEvaluationException, QueryEvaluationException {
if (expr instanceof FilterExpr) {
return evaluate((FilterExpr) expr, bindings);
}
if (expr instanceof ConjunctiveFilterExpr) {
return evaluate((ConjunctiveFilterExpr) expr, bindings);
}
return super.evaluate(expr, bindings);
}
@Override
public QueryValueEvaluationStep precompile(ValueExpr expr, QueryEvaluationContext context)
throws ValueExprEvaluationException, QueryEvaluationException {
if (expr instanceof FilterExpr) {
return prepare((FilterExpr) expr, context);
}
if (expr instanceof ConjunctiveFilterExpr) {
return prepare((ConjunctiveFilterExpr) expr, context);
}
return super.precompile(expr, context);
}
public Value evaluate(FilterExpr node, BindingSet bindings)
throws ValueExprEvaluationException, QueryEvaluationException {
Value v = evaluate(node.getExpression(), bindings);
return BooleanLiteral.valueOf(QueryEvaluationUtil.getEffectiveBooleanValue(v));
}
protected QueryValueEvaluationStep prepare(FilterExpr node, QueryEvaluationContext context)
throws ValueExprEvaluationException, QueryEvaluationException {
QueryValueEvaluationStep expr = precompile(node.getExpression(), context);
return bindings -> {
Value v = expr.evaluate(bindings);
return BooleanLiteral.valueOf(QueryEvaluationUtil.getEffectiveBooleanValue(v));
};
}
public Value evaluate(ConjunctiveFilterExpr node, BindingSet bindings)
throws ValueExprEvaluationException, QueryEvaluationException {
return prepare(node, new QueryEvaluationContext.Minimal(dataset)).evaluate(bindings);
}
protected QueryValueEvaluationStep prepare(ConjunctiveFilterExpr node, QueryEvaluationContext context)
throws ValueExprEvaluationException, QueryEvaluationException {
List collect = node.getExpressions()
.stream()
.map((e) -> precompile(e, context))
.collect(Collectors.toList());
return bindings -> {
ValueExprEvaluationException error = null;
try {
for (QueryValueEvaluationStep ves : collect) {
Value v = ves.evaluate(bindings);
if (QueryEvaluationUtil.getEffectiveBooleanValue(v) == false) {
return BooleanLiteral.FALSE;
}
}
} catch (ValueExprEvaluationException e) {
error = e;
}
if (error != null)
throw error;
return BooleanLiteral.TRUE;
};
}
public CloseableIteration evaluate(DescribeOperator operator,
final BindingSet bindings) throws QueryEvaluationException {
if (!(operator instanceof FederatedDescribeOperator)) {
throw new FedXRuntimeException(
"Expected a FedXDescribeOperator Node. Found " + operator.getClass() + " instead.");
}
CloseableIteration iter = null;
try {
iter = evaluate(operator.getArg(), bindings);
// Note: we need to evaluate the DESCRIBE over the entire federation
return new FederatedDescribeIteration(iter, this, operator.getBindingNames(), bindings,
((FederatedDescribeOperator) operator).getQueryInfo());
} catch (Throwable t) {
if (iter != null) {
iter.close();
}
throw t;
}
}
@Override
protected QueryEvaluationStep prepare(DescribeOperator operator, QueryEvaluationContext context)
throws QueryEvaluationException {
if (!(operator instanceof FederatedDescribeOperator)) {
throw new FedXRuntimeException(
"Expected a FedXDescribeOperator Node. Found " + operator.getClass() + " instead.");
}
QueryEvaluationStep child = precompile(operator.getArg(), context);
return bs -> new FederatedDescribeIteration(child.evaluate(bs), this, operator.getBindingNames(), bs,
((FederatedDescribeOperator) operator).getQueryInfo());
}
protected CloseableIteration evaluateAtStatementSources(Object preparedQuery,
List statementSources, QueryInfo queryInfo) throws QueryEvaluationException {
if (preparedQuery instanceof String) {
return evaluateAtStatementSources((String) preparedQuery, statementSources, queryInfo);
}
if (preparedQuery instanceof TupleExpr) {
return evaluateAtStatementSources((TupleExpr) preparedQuery, statementSources, queryInfo);
}
throw new RuntimeException(
"Unsupported type for prepared query: " + preparedQuery.getClass().getCanonicalName());
}
protected CloseableIteration evaluateAtStatementSources(String preparedQuery,
List statementSources, QueryInfo queryInfo) throws QueryEvaluationException {
try {
CloseableIteration result;
if (statementSources.size() == 1) {
Endpoint ownedEndpoint = federationContext.getEndpointManager()
.getEndpoint(statementSources.get(0).getEndpointID());
org.eclipse.rdf4j.federated.evaluation.TripleSource t = ownedEndpoint.getTripleSource();
result = t.getStatements(preparedQuery, EmptyBindingSet.getInstance(), (FilterValueExpr) null,
queryInfo);
} else {
WorkerUnionBase union = federationContext.getManager().createWorkerUnion(queryInfo);
for (StatementSource source : statementSources) {
Endpoint ownedEndpoint = federationContext.getEndpointManager().getEndpoint(source.getEndpointID());
union.addTask(new ParallelPreparedUnionTask(union, preparedQuery, ownedEndpoint,
EmptyBindingSet.getInstance(), null, queryInfo));
}
union.run();
result = union;
// TODO we should add some DISTINCT here to have SET semantics
}
return result;
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new QueryEvaluationException(e);
}
}
protected CloseableIteration evaluateAtStatementSources(
TupleExpr preparedQuery, List statementSources, QueryInfo queryInfo)
throws QueryEvaluationException {
try {
CloseableIteration result;
if (statementSources.size() == 1) {
Endpoint ownedEndpoint = federationContext.getEndpointManager()
.getEndpoint(statementSources.get(0).getEndpointID());
org.eclipse.rdf4j.federated.evaluation.TripleSource t = ownedEndpoint.getTripleSource();
result = t.getStatements(preparedQuery, EmptyBindingSet.getInstance(), null, queryInfo);
} else {
WorkerUnionBase union = federationContext.getManager().createWorkerUnion(queryInfo);
for (StatementSource source : statementSources) {
Endpoint ownedEndpoint = federationContext.getEndpointManager().getEndpoint(source.getEndpointID());
union.addTask(new ParallelPreparedAlgebraUnionTask(union, preparedQuery, ownedEndpoint,
EmptyBindingSet.getInstance(), null, queryInfo));
}
union.run();
result = union;
// TODO we should add some DISTINCT here to have SET semantics
}
return result;
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new QueryEvaluationException(e);
}
}
}