org.eclipse.rdf4j.federated.evaluation.join.HashJoin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rdf4j-tools-federation Show documentation
Show all versions of rdf4j-tools-federation Show documentation
A federation engine for virtually integrating SPARQL endpoints
/*******************************************************************************
* Copyright (c) 2019 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.federated.evaluation.join;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.federated.evaluation.FederationEvalStrategy;
import org.eclipse.rdf4j.federated.evaluation.iterator.LazyMutableClosableIteration;
import org.eclipse.rdf4j.federated.structures.QueryInfo;
import org.eclipse.rdf4j.federated.util.QueryAlgebraUtil;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.query.Binding;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep;
import org.eclipse.rdf4j.query.impl.MapBindingSet;
import org.eclipse.rdf4j.repository.sparql.federation.CollectionIteration;
/**
* Operator for a hash join of tuple expressions.
*
* @author Andreas Schwarte
* @since 6.0
*/
public class HashJoin extends JoinExecutorBase {
private final QueryEvaluationStep rightPrepared;
public HashJoin(FederationEvalStrategy strategy,
CloseableIteration leftIter,
TupleExpr rightArg, QueryEvaluationStep rightPrepared, Set joinVars, BindingSet bindings,
QueryInfo queryInfo)
throws QueryEvaluationException {
super(strategy, leftIter, rightArg, bindings, queryInfo);
setJoinVars(joinVars);
this.rightPrepared = rightPrepared;
}
@Override
protected void handleBindings() throws Exception {
// the total number of bindings
int totalBindingsLeft = 0;
int totalBindingsRight = 0;
Collection rightFreeVars = QueryAlgebraUtil.getFreeVars(rightArg);
Set joinVars = getJoinVars();
// evaluate the right join argument
// Note: wrapped in lazy mutable iteration for repetitive reading
try (LazyMutableClosableIteration rightArgIter = new LazyMutableClosableIteration(
rightPrepared.evaluate(bindings))) {
while (!isClosed() && leftIter.hasNext()) {
int blockSizeL = 10;
if (totalBindingsLeft > 20) {
blockSizeL = 100;
}
List leftBlock = new ArrayList<>(blockSizeL);
for (int i = 0; i < blockSizeL && leftIter.hasNext(); i++) {
leftBlock.add(leftIter.next());
totalBindingsLeft++;
}
int blockSizeR = 10;
while (!isClosed() && rightArgIter.hasNext()) {
if (totalBindingsRight > 20) {
blockSizeR = 100;
}
Collection rightBlock = new ArrayList<>(blockSizeR);
for (int i = 0; i < blockSizeR && rightArgIter.hasNext(); i++) {
rightBlock.add(rightArgIter.next());
totalBindingsRight++;
}
performJoin(leftBlock, rightBlock, joinVars, rightFreeVars);
}
rightArgIter.resetCursor();
}
}
if (log.isDebugEnabled()) {
log.debug("JoinStats: left iter of " + getDisplayId() + " had " + totalBindingsLeft + " results.");
}
}
/**
* Perform the join and add the result to this cursor.
*
* See {@link #join(Collection, Collection, Set, Collection)} and {@link #addResult(CloseableIteration)}.
*
*
* @param leftBlock
* @param rightBlock
* @param joinVariables
* @param freeVariablesRight
*/
protected void performJoin(Collection leftBlock, Collection rightBlock,
Set joinVariables, Collection freeVariablesRight) {
addResult(join(leftBlock, rightBlock, joinVariables, freeVariablesRight));
}
/**
* Perform a hash join of bindings from the left block with those of the right block.
*
* This method keeps the merged bindings in the results, if the join variables match and if all previously resolved
* bindings hold.
*
*
* @param leftBlock
* @param rightBlock
* @param joinVariables
* @param freeVariablesRight
* @return the merged binding result
*/
static CloseableIteration join(Collection leftBlock,
Collection rightBlock, Set joinVariables, Collection freeVariablesRight) {
List res = new LinkedList<>();
for (BindingSet left : leftBlock) {
for (BindingSet right : rightBlock) {
boolean match = false;
// check join variables: must be equal in both operands
for (String joinVariable : joinVariables) {
Value leftValue = left.getValue(joinVariable);
Value rightValue = right.getValue(joinVariable);
// join match
if (leftValue != null && leftValue.equals(rightValue)) {
match = true;
break;
}
}
// check other free variables of right expression
// => must be available in bindings
for (String freeVariable : freeVariablesRight) {
if (joinVariables.contains(freeVariable)) {
continue; // skip
}
Value leftValue = left.getValue(freeVariable);
Value rightValue = right.getValue(freeVariable);
if (leftValue != null && leftValue.equals(rightValue)) {
match = true;
}
}
if (match) {
// emit a merged binding set
MapBindingSet mergedBindings = new MapBindingSet();
for (Binding b : left) {
mergedBindings.setBinding(b);
}
for (Binding b : right) {
mergedBindings.setBinding(b);
}
res.add(mergedBindings);
}
}
}
return new CollectionIteration<>(res);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy