All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.sparql.engine.join.Join Maven / Gradle / Ivy

There is a newer version: 5.1.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.sparql.engine.join;

import java.util.ArrayList;
import java.util.List ;

import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.sparql.algebra.Algebra ;
import org.apache.jena.sparql.algebra.Table ;
import org.apache.jena.sparql.algebra.TableFactory ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.binding.Binding ;
import org.apache.jena.sparql.engine.iterator.QueryIterPlainWrapper ;
import org.apache.jena.sparql.engine.main.OpExecutor ;
import org.apache.jena.sparql.expr.ExprList ;

/** API to various join algorithms */
public class Join {
    // See also package org.apache.jena.sparql.engine.index
    // The anti-join code for MINUS

    private final static boolean useNestedLoopJoin      = false ;
    private final static boolean useNestedLoopLeftJoin  = false ;

    /**
     * Standard entry point to a join of two streams.
     * This is not a substitution/index join.
     * (See {@link OpExecutor} for streamed execution using substitution).
     * @param left
     * @param right
     * @param execCxt
     * @return QueryIterator
     */
    public static QueryIterator join(QueryIterator left, QueryIterator right, ExecutionContext execCxt) {
        if ( false )
            return debug(left, right, execCxt,
                         (_left, _right)->hashJoin(_left, _right, execCxt)) ;
        if ( useNestedLoopJoin )
            return nestedLoopJoin(left, right, execCxt) ;
        return hashJoin(left, right, execCxt) ;
    }

    /** Standard entry point to a left join of two streams.
     * This is not a substitution/index join.
     * (See {@link OpExecutor} for streamed execution using substitution).
     *
     * @param left
     * @param right
     * @param conditions
     * @param execCxt
     * @return QueryIterator
     */
    public static QueryIterator leftJoin(QueryIterator left, QueryIterator right, ExprList conditions, ExecutionContext execCxt) {
        if ( false )
            return debug(left, right, execCxt,
                         (_left, _right)->hashLeftJoin(_left, _right, conditions, execCxt)) ;
        if ( useNestedLoopLeftJoin )
            return nestedLoopLeftJoin(left, right, conditions, execCxt) ;
        return hashLeftJoin(left, right, conditions, execCxt) ;
    }

    interface JoinOp {
        public QueryIterator exec(QueryIterator left, QueryIterator right) ;
    }

    /** Inner loop join.
     *  Cancellable.
     * @param left      Left hand side
     * @param right     Right hand side
     * @param execCxt       ExecutionContext
     * @return          QueryIterator
     */
    public static QueryIterator nestedLoopJoin(QueryIterator left, QueryIterator right, ExecutionContext execCxt) {
        return new QueryIterNestedLoopJoin(left, right, execCxt) ;
    }

    /** Left loop join.
     *  Cancellable.
     * @param left      Left hand side
     * @param right     Right hand side
     * @param execCxt       ExecutionContext
     * @return          QueryIterator
     */
    public static QueryIterator nestedLoopLeftJoin(QueryIterator left, QueryIterator right, ExprList conditions, ExecutionContext execCxt) {
        return new QueryIterNestedLoopLeftJoin(left, right, conditions, execCxt) ;
    }

    /** Evaluate using a hash join.
     *
     * @param left      Left hand side
     * @param right     Right hand side
     * @param execCxt   ExecutionContext
     * @return          QueryIterator
     */
    public static QueryIterator hashJoin(QueryIterator left, QueryIterator right, ExecutionContext execCxt) {
        //return new QueryIterNestedLoopJoin(left, right, conditions, execCxt) ;
        return QueryIterHashJoin.create(left, right, execCxt) ;
    }

    /** Evaluate using a hash join.
     *
     * @param joinKey   The key for the probe table.
     * @param left      Left hand side
     * @param right     Right hand side
     * @param execCxt   ExecutionContext
     * @return          QueryIterator
     */
    public static QueryIterator hashJoin(JoinKey joinKey, QueryIterator left, QueryIterator right, ExecutionContext execCxt) {
        return QueryIterHashJoin.create(joinKey, left, right, execCxt) ;
    }

    /**
     * Left outer join by using hash join. Normally, this is
     * hashing the right hand side and streaming the left.  The reverse
     * implementation (hash left, stream right) is also available.
     * @param left
     * @param right
     * @param conditions
     * @param execCxt
     * @return QueryIterator
     */
    public static QueryIterator hashLeftJoin(QueryIterator left, QueryIterator right, ExprList conditions, ExecutionContext execCxt) {
        return QueryIterHashLeftJoin_Right.create(left, right, conditions, execCxt) ;
    }

    /**
     * Left outer join by using hash join. Normally, this is
     * hashing the right hand side and streaming the left.  The reverse
     * implementation (hash left, stream right) is also available.
     * @param joinKey
     * @param left
     * @param right
     * @param conditions
     * @param execCxt
     * @return QueryIterator
     */
    public static QueryIterator hashLeftJoin(JoinKey joinKey, QueryIterator left, QueryIterator right, ExprList conditions, ExecutionContext execCxt) {
        return QueryIterHashLeftJoin_Right.create(joinKey, left, right, conditions, execCxt) ;
    }

    /** Very simple, materializing version - useful for debugging.
     *  Builds output early. Materializes left, streams right.
     *  Does not scale.
     *  No cancellation, no stats.
     *
     * @see #nestedLoopJoin
     */
    public static QueryIterator nestedLoopJoinBasic(QueryIterator left, QueryIterator right, ExecutionContext execCxt) {
        List leftRows = Iter.toList(left) ;
        List output = new ArrayList<>() ;
        for ( ; right.hasNext() ; ) {
            Binding row2 = right.next() ;
            for ( Binding row1 : leftRows ) {
                Binding r = Algebra.merge(row1, row2) ;
                if ( r != null )
                    output.add(r) ;
            }
        }
        return QueryIterPlainWrapper.create(output.iterator(), execCxt) ;
    }

    /** Very simple, materializing version for leftjoin - useful for debugging.
     *  Builds output early. Materializes right, streams left.
     *  Does not scale.
     */
    public static QueryIterator nestedLoopLeftJoinBasic(QueryIterator left, QueryIterator right, ExprList conditions, ExecutionContext execCxt) {
        // Stream from left, materialize right.
        List rightRows = Iter.toList(right) ;
        List output = new ArrayList<>() ;
        long count = 0 ;
        for ( ; left.hasNext() ; ) {
            Binding row1 = left.next() ;
            boolean match = false ;
            for ( Binding row2 : rightRows ) {
                Binding r = Algebra.merge(row1, row2) ;
                if ( r != null && applyConditions(r, conditions, execCxt)) {
                    output.add(r) ;
                    match = true ;
                }
            }
            if ( ! match )
                output.add(row1) ;
        }
        return QueryIterPlainWrapper.create(output.iterator(), execCxt) ;
    }

    // apply conditions.
    private static boolean applyConditions(Binding row, ExprList conditions, ExecutionContext execCxt) {
        if ( conditions == null )
            return true ;
        return conditions.isSatisfied(row, execCxt) ;
    }

    /* Debug.
     * Print inputs and outputs.
     * This involves materializing the iterators.
     */
    private static QueryIterator debug(QueryIterator left, QueryIterator right, ExecutionContext execCxt, JoinOp action) {
            Table t1 = TableFactory.create(left) ;
            Table t2 = TableFactory.create(right) ;

            left = t1.iterator(execCxt) ;
            right = t2.iterator(execCxt) ;

            QueryIterator qIter = action.exec(left, right) ;
            Table t3 = TableFactory.create(qIter) ;
            System.out.println("** Left") ;
            System.out.println(t1) ;
            System.out.println("** Right") ;
            System.out.println(t2) ;
            System.out.println("** ") ;
            System.out.println(t3) ;
    //        // Could do again here, different algoithm for comparison.
    //        left = t1.iterator(execCxt) ;
    //        right = t2.iterator(execCxt) ;
    //        System.out.println("** nestedLoopJoin") ;
    //        Table t4 = TableFactory.create(?????) ;
    //        System.out.println(t4) ;
            return t3.iterator(execCxt) ;
        }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy