All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.join.SolutionSetHashJoinOp Maven / Gradle / Ivy

Go to download

Blazegraph(TM) DB Core Platform. It contains all Blazegraph DB dependencies other than Blueprints.

There is a newer version: 2.1.4
Show newest version
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Aug 30, 2011
 */

package com.bigdata.bop.join;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.FutureTask;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.HTreeNamedSubqueryOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.INamedSubqueryOp;
import com.bigdata.bop.controller.JVMNamedSubqueryOp;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.htree.HTree;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;

/**
 * Operator joins a solution set modeled as a hash index into the pipeline. The
 * solution set may be modeled by an {@link HTree} or a JVM {@link HashMap}.
 * While this JOIN requires the RHS hash index to be fully materialized,
 * evaluation of the LHS source solutions is pipelined.
 * 

* Parallel evaluation of source chunks is permitted, but the RHS hash index * must have been checkpointed before this operator begins evaluation (the * read-only {@link HTree} is thread-safe for concurrent readers). The * checkpoint is a NOP for the thread-safe JVM {@link ConcurrentHashMap} * collection class. *

* A {@link HTree} solution set must be be constructed by a * {@link HTreeNamedSubqueryOp} or a {@link HTreeHashIndexOp}. A JVM solution * set must be constructed by a {@link JVMNamedSubqueryOp} or * {@link JVMHashIndexOp}. * *

Handling OPTIONAL, EXISTS, and NOT-EXISTS

* * {@link PipelineOp.Annotations#LAST_PASS} evaluation MUST be requested for any * other than a {@link JoinTypeEnum#Normal}. See * {@link ChunkTask#doLastPass(UnsyncLocalOutputBuffer)} for details. * * TODO This class could be made concrete. There is no logic it in that is * specific to either the {@link HTree} or JVM hash join operation. * * @see INamedSubqueryOp * @see HashIndexOp * * @author Bryan Thompson * @version $Id: NamedSubqueryIncludeOp.java 5178 2011-09-12 19:09:23Z * thompsonbry $ */ abstract public class SolutionSetHashJoinOp extends PipelineOp { /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends PipelineOp.Annotations, NamedSetAnnotations { /** * An {@link IConstraint}[] to be applied to solutions when they are * joined (optional). */ final String CONSTRAINTS = JoinAnnotations.CONSTRAINTS; /** * When true the hash index identified by * {@link #NAMED_SET_REF} will be released when this operator is done * (default {@value #DEFAULT_RELEASE}). *

* Note: Whether or not the hash index can be released depends on * whether or not the hash index will be consumed by more than one * operator in the query plan. For example, a named solution set can be * consumed by more than one operator and thus must not be released * until all such operators are done. * * TODO Alternatively, we could specify the #of different locations in * the query plan where the named solution set will be consumed. This * could be part of the {@link HTreeHashJoinUtility} state, in which * case it would only be set as an annotation on the operator which * generates the hash index. *

* Note: Any memory associated with the {@link IRunningQuery} will be * released no later than when the {@link IRunningQuery#isDone()}. This * only provides a means to release data as soon as it is known that the * data will not be referenced again during the query. */ final String RELEASE = SolutionSetHashJoinOp.class + ".release"; final boolean DEFAULT_RELEASE = true; } /** * Deep copy constructor. */ public SolutionSetHashJoinOp(final SolutionSetHashJoinOp op) { super(op); } /** * Shallow copy constructor. * * @param args * @param annotations */ public SolutionSetHashJoinOp(final BOp[] args, final Map annotations) { super(args, annotations); if (getProperty(Annotations.RELEASE, Annotations.DEFAULT_RELEASE) && !isLastPassRequested()) { /* * In order to release the hash index, this operator needs to be * notified when no more source solutions will become available. */ throw new IllegalArgumentException(Annotations.RELEASE + " requires " + Annotations.LAST_PASS); } // The RHS annotation must be specified. getRequiredProperty(Annotations.NAMED_SET_REF); } public SolutionSetHashJoinOp(final BOp[] args, final NV... annotations) { this(args, NV.asMap(annotations)); } @Override public BaseJoinStats newStats() { return new BaseJoinStats(); } @Override public FutureTask eval(final BOpContext context) { return new FutureTask(new ChunkTask(context, this)); } /** * Task executing on the node. */ private static class ChunkTask implements Callable { private final BOpContext context; private final SolutionSetHashJoinOp op; private final IHashJoinUtility state; private final IConstraint[] constraints; private final boolean release; private final BaseJoinStats stats; private final IBlockingBuffer sink; private final IBlockingBuffer sink2; public ChunkTask(final BOpContext context, final SolutionSetHashJoinOp op) { this.context = context; this.stats = (BaseJoinStats) context.getStats(); this.release = op.getProperty(Annotations.RELEASE, Annotations.DEFAULT_RELEASE); this.sink = context.getSink(); this.sink2 = context.getSink2(); this.op = op; // The name of the attribute used to discover the solution set. final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op .getRequiredProperty(Annotations.NAMED_SET_REF); /* * Lookup the attributes for the query on which we will hang the * solution set. See BLZG-1493 (if queryId is null, use the query * attributes for this running query). */ final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId()); state = (IHashJoinUtility) attrs.get(namedSetRef); if (state == null) { // The solution set was not found! throw new RuntimeException("Not found: " + namedSetRef); } if (!state.getJoinType().isNormal() && !op.isLastPassRequested()) { /* * Anything but a Normal join requires that we observe all solutions * and then do some final reporting. This is necessary for Optional, * Exists, and NotExists. */ throw new UnsupportedOperationException(state.getJoinType() + " requires " + Annotations.LAST_PASS); } /* * Combine the original constraints (if any) with those attached to * this operator (if any). * * Note: The solution set hash join is used to join in a hash index * generated by some other part of the query plan. Since it is also * used for named subqueries, which can be included in more than one * location, it is necessary that we can override/expand on the join * constraints for this operator. */ this.constraints = BOpUtility.concat( (IConstraint[]) op.getProperty(Annotations.CONSTRAINTS), state.getConstraints()); } @Override public Void call() throws Exception { try { doHashJoin(); // Done. return null; } finally { if (release && context.isLastInvocation()) { /* * Note: It is possible to INCLUDE the named temporary * solution set multiple times within a query. If we want to * release() the hash tree then we need to know how many * times the temporary solution set is being included and * decrement a counter each time. When the counter reaches * zero, we can release the hash index. */ state.release(); } sink.close(); if (sink2 != null) sink2.close(); } } /** * Do a hash join of the buffered solutions with the access path. */ private void doHashJoin() { if (state.isEmpty()) return; stats.accessPathCount.increment(); stats.accessPathRangeCount.add(state.getRightSolutionCount()); final UnsyncLocalOutputBuffer unsyncBuffer = new UnsyncLocalOutputBuffer( op.getChunkCapacity(), sink); state.hashJoin2(context.getSource(), stats, unsyncBuffer, constraints); if (context.isLastInvocation()) { doLastPass(unsyncBuffer); } unsyncBuffer.flush(); sink.flush(); } /** * This method handles {@link JoinTypeEnum} values other than * {@link JoinTypeEnum#Normal}. {@link PipelineOp.Annotations#LAST_PASS} * evaluation MUST be requested for any other than a * {@link JoinTypeEnum#Normal}. */ private void doLastPass( final UnsyncLocalOutputBuffer unsyncBuffer) { switch (state.getJoinType()) { case Normal: /* * Nothing to do. */ break; case Optional: case NotExists: { /* * Output the optional solutions. */ // where to write the optional solutions. final AbstractUnsynchronizedArrayBuffer unsyncBuffer2 = sink2 == null ? unsyncBuffer : new UnsyncLocalOutputBuffer( op.getChunkCapacity(), sink2); state.outputOptionals(unsyncBuffer2); unsyncBuffer2.flush(); if (sink2 != null) sink2.flush(); break; } case Exists: { /* * Output the join set. * * Note: This has special hooks to support (NOT) EXISTS * graph patterns, which must bind the "ASK_VAR" depending * on whether or not the graph pattern is satisified. */ final IVariable askVar = state.getAskVar(); // askVar := true state.outputJoinSet(unsyncBuffer); if (askVar != null) { // askVar := false; state.outputOptionals(unsyncBuffer); } break; } default: throw new AssertionError(); } } } // class ChunkTask }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy