com.bigdata.rdf.store.BigdataTriplePatternMaterializer Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.store;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.system.SystemUtil;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.striterator.AbstractChunkedResolverator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.util.concurrent.LatchedExecutor;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Efficient batched, streaming resolution of triple patterns to statements
* spanned by those triple patterns that are present in the data.
*
* Note: If the input contains triple patterns that have a high cardinality
* in the data, then a large number of statements may be returned.
*
* @param triplePatterns
* A collection of triple patterns or fully bound statements. If
* this collection contains triple patterns that have a high
* cardinality in the data, then a large number of statements may
* be returned.
*
* @return An iterator from which the materialized statements spanned by
* those triple patterns may be read.
*
* @see Efficient batch
* remove of a collection of triple patterns
*/
public class BigdataTriplePatternMaterializer
extends
AbstractChunkedResolverator
implements ICloseableIterator {
// implements IChunkedOrderedIterator {
private final int nthreads;
public BigdataTriplePatternMaterializer(final AbstractTripleStore db,
final IChunkedOrderedIterator src) {
this(db, src, 4/* nthreads */);
}
public BigdataTriplePatternMaterializer(final AbstractTripleStore db,
final IChunkedOrderedIterator src,
final int nthreads) {
super(db, src, new BlockingBuffer(
db.getChunkOfChunksCapacity(),
db.getChunkCapacity(),
db.getChunkTimeout(),
TimeUnit.MILLISECONDS));
if (nthreads < 0)
throw new IllegalArgumentException();
// At least 1 thread. At most ncpus*2.
this.nthreads = Math.max(
Math.min(nthreads, SystemUtil.numProcessors() * 2), 1);
}
@Override
public BigdataTriplePatternMaterializer start(final ExecutorService service) {
helperService.set(new LatchedExecutor(service, nthreads));
super.start(service);
return this;
}
private final AtomicReference helperService = new AtomicReference();
@Override
protected ISPO[] resolveChunk(final BigdataTriplePattern[] chunk) {
final LatchedExecutor helperService = this.helperService.get();
if (helperService == null)
throw new IllegalStateException();
/**
* The output will be at most sizeof(chunk) arrays. Each array will have
* one or more statements. Any triple patterns that have no intersection
* in the data will be dropped and will not put anything into this
* output queue.
*
* @see Deadlock in
* BigdataTriplePatternMaterializer
*/
final Queue out = new ConcurrentLinkedQueue(
/*chunk.length*/);
final List> tasks = new LinkedList>();
try {
final CountDownLatch latch = new CountDownLatch(chunk.length);
/*
* Create FutureTasks for each subquery. The futures are not
* submitted to the Executor yet. That happens in call(). By
* deferring the evaluation until call() we gain the ability to
* cancel all subqueries if any subquery fails.
*/
for (BigdataTriplePattern stmt : chunk) {
/*
* Task runs subquery and cancels all subqueries in [tasks] if
* it fails.
*/
final FutureTask ft = new FutureTask(
new ResolveTriplePatternTask(stmt, out)) {
/*
* Hook future to count down the latch when the task is
* done.
*/
@Override
public void run() {
try {
super.run();
} finally {
latch.countDown();
}
}
};
tasks.add(ft);
}
/*
* Run triple pattern resolution with limited parallelism.
*/
for (FutureTask ft : tasks) {
helperService.execute(ft);
}
/*
* Wait for all tasks to complete.
*/
latch.await();
/*
* Check futures, counting the #of solutions.
*/
long nfound = 0L;
for (FutureTask ft : tasks) {
nfound += ft.get();
if (nfound > Integer.MAX_VALUE)
throw new UnsupportedOperationException();
}
/*
* Convert into a single ISPO[] chunk.
*/
final ISPO[] dest = new ISPO[(int) nfound];
int destPos = 0;
ISPO[] src = null;
while ((src = out.poll()) != null) {
System.arraycopy(src/* src */, 0/* srcPos */, dest, destPos,
src.length);
destPos += src.length;
}
return dest;
} catch (InterruptedException e) {
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
} finally {
// Cancel any tasks which are still running.
for (FutureTask ft : tasks)
ft.cancel(true/* mayInterruptIfRunning */);
}
}
/**
* Resolve a triple pattern to the statements that it spans in the data.
*
* @author Bryan
* Thompson
*/
private class ResolveTriplePatternTask implements Callable {
private final BigdataTriplePattern stmt;
private final Queue out;
public ResolveTriplePatternTask(final BigdataTriplePattern stmt,
final Queue out) {
this.stmt = stmt;
this.out = out;
}
@Override
public Long call() throws Exception {
/*
* TODO What about closure over the SIDs?
*
* final IChunkedOrderedIterator itr =
* database.computeClosureForStatementIdentifiers(
* database.getAccessPath(s, p, o, c).iterator());
*/
final IAccessPath ap = (IAccessPath) state.getAccessPath(
stmt.getSubject(), stmt.getPredicate(),
stmt.getObject(), stmt.getContext());
// if(ap.isFullyBoundForKey()) {
// /*
// * Optimize when triple pattern is a fully bound statement.
// * In this case, the output is either that statement (with IVs
// * resolved) or the triple pattern is dropped.
// */
// final IChunkedOrderedIterator itr = ap.iterator();
// try {
// if (!itr.hasNext())
// return 0L;
// final ISPO spo = itr.next();
// out.add(new ISPO[]{spo});
// return 1L;
// } finally {
// itr.close();
// }
// } else {
long n = 0L;
final IChunkedOrderedIterator itr = ap.iterator();
try {
while (itr.hasNext()) {
final ISPO[] a = itr.nextChunk();
// if (true) {
// // verify no null array elements.
// for (int i = 0; i < a.length; i++) {
// if (a[i] == null)
// throw new AssertionError(Arrays.toString(a));
// }
// }
/**
* This will never fail for a ConcurrentLinkedQueue.
*
* @see
* Deadlock in BigdataTriplePatternMaterializer
*/
final boolean result = out.offer(a);
assert result : "insertion failed - expects an unbounded queue";
n += a.length;
}
return n;
} finally {
itr.close();
}
// }
}
}
}