All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.sail.federation.AbstractFederationConnection Maven / Gradle / Ivy

Go to download

The Federation SAIL allows multiple datasets to be virtually combined into a single dataset. The Federation SAIL combines multiple RDF stores that may exist on a remote server or are embedded in the same JVM. The Federation uses query optimizations to distribute sections of the query to different members based on the data contained in each of the members. These results are then joined together within the federation to provide the same result as if all the data was co-located within a single repository.

There is a newer version: 4.0.0-M1
Show newest version
/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *******************************************************************************/
package org.eclipse.rdf4j.sail.federation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.http.client.HttpClient;
import org.eclipse.rdf4j.IsolationLevel;
import org.eclipse.rdf4j.IsolationLevels;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration;
import org.eclipse.rdf4j.common.iteration.DistinctIteration;
import org.eclipse.rdf4j.common.iteration.ExceptionConvertingIteration;
import org.eclipse.rdf4j.common.iteration.UnionIteration;
import org.eclipse.rdf4j.http.client.HttpClientDependent;
import org.eclipse.rdf4j.http.client.HttpClientSessionManager;
import org.eclipse.rdf4j.http.client.SessionManagerDependent;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.algebra.QueryRoot;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy;
import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource;
import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver;
import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolverClient;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.BindingAssigner;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.CompareOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.ConjunctiveConstraintSplitter;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.ConstantOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.DisjunctiveConstraintOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.SameTermFilterOptimizer;
import org.eclipse.rdf4j.query.explanation.Explanation;
import org.eclipse.rdf4j.query.impl.EmptyBindingSet;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.repository.RepositoryResolver;
import org.eclipse.rdf4j.repository.RepositoryResolverClient;
import org.eclipse.rdf4j.repository.RepositoryResult;
import org.eclipse.rdf4j.repository.filters.AccurateRepositoryBloomFilter;
import org.eclipse.rdf4j.repository.filters.RepositoryBloomFilter;
import org.eclipse.rdf4j.sail.SailConnection;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.federation.optimizers.EmptyPatternOptimizer;
import org.eclipse.rdf4j.sail.federation.optimizers.FederationJoinOptimizer;
import org.eclipse.rdf4j.sail.federation.optimizers.OwnedTupleExprPruner;
import org.eclipse.rdf4j.sail.federation.optimizers.PrepareOwnedTupleExpr;
import org.eclipse.rdf4j.sail.federation.optimizers.QueryModelPruner;
import org.eclipse.rdf4j.sail.federation.optimizers.QueryMultiJoinOptimizer;
import org.eclipse.rdf4j.sail.helpers.AbstractSail;
import org.eclipse.rdf4j.sail.helpers.AbstractSailConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Unions the results from multiple {@link RepositoryConnection} into one {@link SailConnection}.
 *
 * @author James Leigh
 * @author Arjohn Kampman
 */
abstract class AbstractFederationConnection extends AbstractSailConnection implements FederatedServiceResolverClient,
		RepositoryResolverClient, HttpClientDependent, SessionManagerDependent {

	private static final Logger LOGGER = LoggerFactory.getLogger(AbstractFederationConnection.class);

	private final Federation federation;

	private final ValueFactory valueFactory;

	protected final List members;

	/**
	 * Connection specific resolver.
	 */
	private FederatedServiceResolver federatedServiceResolver;

	public AbstractFederationConnection(Federation federation, List members) {
		super(new AbstractSail() {

			@Override
			public boolean isWritable() throws SailException {
				return false;
			}

			@Override
			public ValueFactory getValueFactory() {
				return SimpleValueFactory.getInstance();
			}

			@Override
			protected void shutDownInternal() throws SailException {
				// ignore
			}

			@Override
			protected SailConnection getConnectionInternal() throws SailException {
				return null;
			}

			@Override
			protected void connectionClosed(SailConnection connection) {
				// ignore
			}

			@Override
			public List getSupportedIsolationLevels() {
				return Arrays.asList(new IsolationLevel[] { IsolationLevels.NONE });
			}

			@Override
			public IsolationLevel getDefaultIsolationLevel() {
				return IsolationLevels.NONE;
			}
		});
		this.federation = federation;

		valueFactory = SimpleValueFactory.getInstance();

		this.members = new ArrayList<>(members);
	}

	public ValueFactory getValueFactory() {
		return valueFactory;
	}

	@Override
	public void closeInternal() throws SailException {
		excute(RepositoryConnection::close);
	}

	@Override
	public CloseableIteration getContextIDsInternal() throws SailException {
		CloseableIteration cursor = union(RepositoryConnection::getContextIDs);

		cursor = new DistinctIteration(cursor);

		return cursor;
	}

	public FederatedServiceResolver getFederatedServiceResolver() {
		if (federatedServiceResolver == null) {
			return federation.getFederatedServiceResolver();
		}
		return federatedServiceResolver;
	}

	@Override
	public void setFederatedServiceResolver(FederatedServiceResolver resolver) {
		this.federatedServiceResolver = resolver;
		for (RepositoryConnection member : members) {
			if (member instanceof FederatedServiceResolverClient) {
				((FederatedServiceResolverClient) member).setFederatedServiceResolver(resolver);
			}
		}
	}

	@Override
	public void setRepositoryResolver(RepositoryResolver resolver) {
		for (RepositoryConnection member : members) {
			if (member instanceof RepositoryResolverClient) {
				((RepositoryResolverClient) member).setRepositoryResolver(resolver);
			}
		}
	}

	@Override
	public HttpClientSessionManager getHttpClientSessionManager() {
		for (RepositoryConnection member : members) {
			if (member instanceof SessionManagerDependent) {
				HttpClientSessionManager client = ((SessionManagerDependent) member).getHttpClientSessionManager();
				if (client != null) {
					return client;
				}
			}
		}
		return null;
	}

	@Override
	public void setHttpClientSessionManager(HttpClientSessionManager client) {
		for (RepositoryConnection member : members) {
			if (member instanceof SessionManagerDependent) {
				((SessionManagerDependent) member).setHttpClientSessionManager(client);
			}
		}
	}

	@Override
	public HttpClient getHttpClient() {
		for (RepositoryConnection member : members) {
			if (member instanceof HttpClientDependent) {
				HttpClient client = ((HttpClientDependent) member).getHttpClient();
				if (client != null) {
					return client;
				}
			}
		}
		return null;
	}

	@Override
	public void setHttpClient(HttpClient client) {
		for (RepositoryConnection member : members) {
			if (member instanceof HttpClientDependent) {
				((HttpClientDependent) member).setHttpClient(client);
			}
		}
	}

	@Override
	public String getNamespaceInternal(String prefix) throws SailException {
		try {
			String namespace = null;
			for (RepositoryConnection member : members) {
				String candidate = member.getNamespace(prefix);
				if (namespace == null) {
					namespace = candidate;
				} else if (candidate != null && !candidate.equals(namespace)) {
					namespace = null; // NOPMD
					break;
				}
			}
			return namespace;
		} catch (RepositoryException e) {
			throw new SailException(e);
		}
	}

	@Override
	public CloseableIteration getNamespacesInternal() throws SailException {
		Map namespaces = new HashMap<>();
		Set prefixes = new HashSet<>();
		Set conflictedPrefixes = new HashSet<>();

		try {
			for (RepositoryConnection member : members) {
				try (RepositoryResult memberNamespaces = member.getNamespaces()) {
					while (memberNamespaces.hasNext()) {
						Namespace next = memberNamespaces.next();
						String prefix = next.getPrefix();

						if (prefixes.add(prefix)) {
							namespaces.put(prefix, next);
						} else if (!next.getName().equals(namespaces.get(prefix).getName())) {
							conflictedPrefixes.add(prefix);
						}
					}
				}
			}
		} catch (RepositoryException e) {
			throw new SailException(e);
		}
		for (String prefix : conflictedPrefixes) {
			namespaces.remove(prefix);
		}
		return new CloseableIteratorIteration<>(namespaces.values().iterator());
	}

	@Override
	public long sizeInternal(Resource... contexts) throws SailException {
		try {
			if (federation.isDistinct()) {
				long size = 0;
				for (RepositoryConnection member : members) {
					size += member.size(contexts);
				}
				return size; // NOPMD
			} else {
				try (CloseableIteration cursor = getStatements(null, null, null,
						false, contexts)) {
					long size = 0;
					while (cursor.hasNext()) {
						cursor.next();
						size++;
					}
					return size;
				}
			}
		} catch (RepositoryException e) {
			throw new SailException(e);
		}
	}

	@Override
	public CloseableIteration getStatementsInternal(final Resource subj,
			final IRI pred, final Value obj, final boolean includeInferred, final Resource... contexts)
			throws SailException {
		CloseableIteration cursor = union(
				(RepositoryConnection member) -> member.getStatements(subj, pred, obj, includeInferred, contexts));

		if (!federation.isDistinct() && !isLocal(pred)) {
			// Filter any duplicates
			cursor = new DistinctIteration(cursor);
		}

		return cursor;
	}

	@Override
	public CloseableIteration evaluateInternal(TupleExpr query,
			Dataset dataset, BindingSet bindings, boolean inf) throws SailException {
		TripleSource tripleSource = new FederationTripleSource(inf);
		EvaluationStrategy strategy = federation.createEvaluationStrategy(tripleSource, dataset,
				getFederatedServiceResolver());
		TupleExpr qry = optimize(query, dataset, bindings, inf, strategy);
		try {
			return strategy.evaluate(qry, EmptyBindingSet.getInstance());
		} catch (QueryEvaluationException e) {
			throw new SailException(e);
		}
	}

	private class FederationTripleSource implements TripleSource {

		private final boolean inf;

		public FederationTripleSource(boolean includeInferred) {
			this.inf = includeInferred;
		}

		@Override
		public CloseableIteration getStatements(Resource subj, IRI pred,
				Value obj, Resource... contexts) throws QueryEvaluationException {
			try {
				CloseableIteration result = AbstractFederationConnection.this
						.getStatements(subj, pred, obj, inf, contexts);
				return new ExceptionConvertingIteration(result) {

					@Override
					protected QueryEvaluationException convert(Exception e) {
						return new QueryEvaluationException(e);
					}
				};
			} catch (SailException e) {
				throw new QueryEvaluationException(e);
			}
		}

		@Override
		public ValueFactory getValueFactory() {
			return valueFactory;
		}
	}

	private TupleExpr optimize(TupleExpr parsed, Dataset dataset, BindingSet bindings, boolean includeInferred,
			EvaluationStrategy strategy) throws SailException {
		LOGGER.trace("Incoming query model:\n{}", parsed);

		// Clone the tuple expression to allow for more aggressive optimisations
		TupleExpr query = new QueryRoot(parsed.clone());

		new BindingAssigner().optimize(query, dataset, bindings);
		new ConstantOptimizer(strategy).optimize(query, dataset, bindings);
		new CompareOptimizer().optimize(query, dataset, bindings);
		new ConjunctiveConstraintSplitter().optimize(query, dataset, bindings);
		new DisjunctiveConstraintOptimizer().optimize(query, dataset, bindings);
		new SameTermFilterOptimizer().optimize(query, dataset, bindings);
		new QueryModelPruner().optimize(query, dataset, bindings);

		new QueryMultiJoinOptimizer().optimize(query, dataset, bindings);
		// new FilterOptimizer().optimize(query, dataset, bindings);

		// prepare bloom filters
		RepositoryBloomFilter defaultBloomFilter = new AccurateRepositoryBloomFilter(includeInferred);
		Map bloomFilters = federation.getBloomFilters();
		java.util.function.Function bloomFilterFunction = c -> bloomFilters
				.getOrDefault(c, defaultBloomFilter);

		new EmptyPatternOptimizer(members, bloomFilterFunction).optimize(query, dataset, bindings);
		boolean distinct = federation.isDistinct();
		PrefixHashSet local = federation.getLocalPropertySpace();
		new FederationJoinOptimizer(members, distinct, local, bloomFilterFunction).optimize(query, dataset, bindings);
		new OwnedTupleExprPruner().optimize(query, dataset, bindings);
		new QueryModelPruner().optimize(query, dataset, bindings);
		new QueryMultiJoinOptimizer().optimize(query, dataset, bindings);

		new PrepareOwnedTupleExpr().optimize(query, dataset, bindings);

		LOGGER.trace("Optimized query model:\n{}", query);
		return query;
	}

	interface Procedure {

		void run(RepositoryConnection member) throws RepositoryException;
	}

	void excute(Procedure operation) throws SailException { // NOPMD
		RepositoryException storeExc = null;
		RuntimeException runtimeExc = null;

		for (RepositoryConnection member : members) {
			try {
				operation.run(member);
			} catch (RepositoryException e) {
				LOGGER.error("Failed to execute procedure on federation members", e);
				if (storeExc == null) {
					storeExc = e;
				}
			} catch (RuntimeException e) {
				LOGGER.error("Failed to execute procedure on federation members", e);
				if (runtimeExc == null) {
					runtimeExc = e;
				}
			}
		}

		if (storeExc != null) {
			throw new SailException(storeExc);
		}

		if (runtimeExc != null) {
			throw runtimeExc;
		}
	}

	private interface Function {

		CloseableIteration call(RepositoryConnection member)
				throws RepositoryException;
	}

	private  CloseableIteration union(Function function) throws SailException {
		List> cursors = new ArrayList<>(members.size());

		try {
			for (RepositoryConnection member : members) {
				cursors.add(function.call(member));
			}
			UnionIteration result = new UnionIteration<>(cursors);
			return new ExceptionConvertingIteration(result) {

				@Override
				protected SailException convert(Exception e) {
					return new SailException(e);
				}
			};
		} catch (RepositoryException e) {
			closeAll(cursors);
			throw new SailException(e);
		} catch (RuntimeException e) {
			closeAll(cursors);
			throw e;
		}
	}

	private boolean isLocal(IRI pred) {
		if (pred == null) {
			return false; // NOPMD
		}

		PrefixHashSet hash = federation.getLocalPropertySpace();
		if (hash == null) {
			return false; // NOPMD
		}

		return hash.match(pred.stringValue());
	}

	private void closeAll(Iterable> cursors) {
		for (CloseableIteration cursor : cursors) {
			try {
				cursor.close();
			} catch (RepositoryException e) {
				LOGGER.error("Failed to close cursor", e);
			}
		}
	}

	@Override
	public Explanation explain(Explanation.Level level, TupleExpr tupleExpr, Dataset dataset,
			BindingSet bindings, boolean includeInferred, int timeoutSeconds) {
		throw new UnsupportedOperationException();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy