All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.rdf.join.DataSetJoin Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 20, 2010
 */

package com.bigdata.bop.rdf.join;

import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.rdf.internal.IV;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsynchronizedArrayBuffer;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * DataSetJoin(left)[var=g; graphs={graphIds}]
 * 

* SPARQL specific join of the source binding sets with an inline access path * allowing var to take on the given graphIds values. This join operator * is useful when the multiplicity of the graphs is small to moderate. If there * are a very large number of graphs, then the operator tree is to cumbersome * and you would do better off joining against an index (whether temporary or * permanent) containing the graphs. *

* The evaluation context is {@link BOpEvaluationContext#ANY}. * * @author Bryan Thompson * @version $Id$ */ public class DataSetJoin extends PipelineOp { /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends PipelineOp.Annotations { /** * The variable to be bound. */ String VAR = DataSetJoin.class.getName() + ".var"; /** * The {@link Set} of {@link IV}s to be bound. A {@link LinkedHashSet} * should be used for efficiency since it provides fast ordered scans * and fast point tests. */ String GRAPHS = DataSetJoin.class.getName() + ".graphs"; } /** * Deep copy constructor. * * @param op */ public DataSetJoin(final DataSetJoin op) { super(op); } /** * Shallow copy constructor. * @param args * @param annotations */ public DataSetJoin(final BOp[] args, final Map annotations) { super(args, annotations); getVar(); getGraphs(); } public DataSetJoin(final BOp[] args, final NV... annotations) { this(args, NV.asMap(annotations)); } public IVariable getVar() { return (IVariable) getRequiredProperty(Annotations.VAR); } @SuppressWarnings({ "unchecked", "rawtypes" }) public Set getGraphs() { return (Set) getRequiredProperty(Annotations.GRAPHS); } @Override public FutureTask eval(final BOpContext context) { return new FutureTask(new DataSetJoinTask(this,context)); } /** * Specialized in-memory join. */ static private class DataSetJoinTask implements Callable { private final DataSetJoin op; private final BOpContext context; private final IVariable var; @SuppressWarnings("rawtypes") private final Set graphs; DataSetJoinTask(final DataSetJoin op, final BOpContext context) { this.op = op; this.context = context; var = op.getVar(); graphs = op.getGraphs(); } @Override public Void call() throws Exception { final ICloseableIterator source = context .getSource(); final IBlockingBuffer sink = context.getSink(); try { final BOpStats stats = context.getStats(); final UnsynchronizedArrayBuffer tmp = new UnsynchronizedArrayBuffer( sink, IBindingSet.class, op.getChunkCapacity()); while (source.hasNext()) { final IBindingSet[] chunk = source.next(); stats.chunksIn.increment(); stats.unitsIn.add(chunk.length); handleChunk(chunk, tmp); } tmp.flush(); sink.flush(); return null; } finally { sink.close(); source.close(); } } /** * Join source binding set chunk with {@link #graphs}. * * @param chunk * A chunk of {@link IBindingSet}s from the source. * @param tmp * Where to write the data. */ @SuppressWarnings("rawtypes") private void handleChunk(final IBindingSet[] chunk, final UnsynchronizedArrayBuffer tmp) { for (IBindingSet bset : chunk) { final IConstant val = bset.get(var); if (val == null) { /* * When the value is unbound, we output the cross product. */ for (IV c : graphs) { bset = bset.clone(); bset.set(var, new Constant(c)); tmp.add(bset); } } else { /* * When the value is bound the binding set will be output * iff the bound value for the variable is found in the * specified graphs. */ if (graphs.contains(val.get())) { // match. output binding set. tmp.add(bset); } } } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy