All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.store.BigdataTriplePatternMaterializer Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
package com.bigdata.rdf.store;

import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.system.SystemUtil;

import com.bigdata.rdf.spo.ISPO;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.striterator.AbstractChunkedResolverator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.util.concurrent.LatchedExecutor;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * Efficient batched, streaming resolution of triple patterns to statements
 * spanned by those triple patterns that are present in the data.
 * 

* Note: If the input contains triple patterns that have a high cardinality * in the data, then a large number of statements may be returned. * * @param triplePatterns * A collection of triple patterns or fully bound statements. If * this collection contains triple patterns that have a high * cardinality in the data, then a large number of statements may * be returned. * * @return An iterator from which the materialized statements spanned by * those triple patterns may be read. * * @see Efficient batch * remove of a collection of triple patterns */ public class BigdataTriplePatternMaterializer extends AbstractChunkedResolverator implements ICloseableIterator { // implements IChunkedOrderedIterator { private final int nthreads; public BigdataTriplePatternMaterializer(final AbstractTripleStore db, final IChunkedOrderedIterator src) { this(db, src, 4/* nthreads */); } public BigdataTriplePatternMaterializer(final AbstractTripleStore db, final IChunkedOrderedIterator src, final int nthreads) { super(db, src, new BlockingBuffer( db.getChunkOfChunksCapacity(), db.getChunkCapacity(), db.getChunkTimeout(), TimeUnit.MILLISECONDS)); if (nthreads < 0) throw new IllegalArgumentException(); // At least 1 thread. At most ncpus*2. this.nthreads = Math.max( Math.min(nthreads, SystemUtil.numProcessors() * 2), 1); } @Override public BigdataTriplePatternMaterializer start(final ExecutorService service) { helperService.set(new LatchedExecutor(service, nthreads)); super.start(service); return this; } private final AtomicReference helperService = new AtomicReference(); @Override protected ISPO[] resolveChunk(final BigdataTriplePattern[] chunk) { final LatchedExecutor helperService = this.helperService.get(); if (helperService == null) throw new IllegalStateException(); /** * The output will be at most sizeof(chunk) arrays. Each array will have * one or more statements. Any triple patterns that have no intersection * in the data will be dropped and will not put anything into this * output queue. * * @see Deadlock in * BigdataTriplePatternMaterializer */ final Queue out = new ConcurrentLinkedQueue( /*chunk.length*/); final List> tasks = new LinkedList>(); try { final CountDownLatch latch = new CountDownLatch(chunk.length); /* * Create FutureTasks for each subquery. The futures are not * submitted to the Executor yet. That happens in call(). By * deferring the evaluation until call() we gain the ability to * cancel all subqueries if any subquery fails. */ for (BigdataTriplePattern stmt : chunk) { /* * Task runs subquery and cancels all subqueries in [tasks] if * it fails. */ final FutureTask ft = new FutureTask( new ResolveTriplePatternTask(stmt, out)) { /* * Hook future to count down the latch when the task is * done. */ @Override public void run() { try { super.run(); } finally { latch.countDown(); } } }; tasks.add(ft); } /* * Run triple pattern resolution with limited parallelism. */ for (FutureTask ft : tasks) { helperService.execute(ft); } /* * Wait for all tasks to complete. */ latch.await(); /* * Check futures, counting the #of solutions. */ long nfound = 0L; for (FutureTask ft : tasks) { nfound += ft.get(); if (nfound > Integer.MAX_VALUE) throw new UnsupportedOperationException(); } /* * Convert into a single ISPO[] chunk. */ final ISPO[] dest = new ISPO[(int) nfound]; int destPos = 0; ISPO[] src = null; while ((src = out.poll()) != null) { System.arraycopy(src/* src */, 0/* srcPos */, dest, destPos, src.length); destPos += src.length; } return dest; } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } finally { // Cancel any tasks which are still running. for (FutureTask ft : tasks) ft.cancel(true/* mayInterruptIfRunning */); } } /** * Resolve a triple pattern to the statements that it spans in the data. * * @author Bryan * Thompson */ private class ResolveTriplePatternTask implements Callable { private final BigdataTriplePattern stmt; private final Queue out; public ResolveTriplePatternTask(final BigdataTriplePattern stmt, final Queue out) { this.stmt = stmt; this.out = out; } @Override public Long call() throws Exception { /* * TODO What about closure over the SIDs? * * final IChunkedOrderedIterator itr = * database.computeClosureForStatementIdentifiers( * database.getAccessPath(s, p, o, c).iterator()); */ final IAccessPath ap = (IAccessPath) state.getAccessPath( stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext()); // if(ap.isFullyBoundForKey()) { // /* // * Optimize when triple pattern is a fully bound statement. // * In this case, the output is either that statement (with IVs // * resolved) or the triple pattern is dropped. // */ // final IChunkedOrderedIterator itr = ap.iterator(); // try { // if (!itr.hasNext()) // return 0L; // final ISPO spo = itr.next(); // out.add(new ISPO[]{spo}); // return 1L; // } finally { // itr.close(); // } // } else { long n = 0L; final IChunkedOrderedIterator itr = ap.iterator(); try { while (itr.hasNext()) { final ISPO[] a = itr.nextChunk(); // if (true) { // // verify no null array elements. // for (int i = 0; i < a.length; i++) { // if (a[i] == null) // throw new AssertionError(Arrays.toString(a)); // } // } /** * This will never fail for a ConcurrentLinkedQueue. * * @see * Deadlock in BigdataTriplePatternMaterializer */ final boolean result = out.offer(a); assert result : "insertion failed - expects an unbounded queue"; n += a.length; } return n; } finally { itr.close(); } // } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy