Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.bigdata.relation.rule.eval.ProgramTask Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jun 24, 2008
*/
package com.bigdata.relation.rule.eval;
import java.io.IOException;
import java.util.Iterator;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.log4j.Logger;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.BufferMode;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IIndexStore;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Journal;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.accesspath.ChunkConsumerIterator;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStep;
import com.bigdata.service.DataService;
import com.bigdata.service.DataServiceCallable;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.IDataService;
import com.bigdata.striterator.IChunkedOrderedIterator;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Task for executing a program when all of the indices for the relation are
* co-located on the same {@link DataService}.
*
* @todo Named result sets. This would provide a means to run a IRuleTask and
* cache the output for further evaluation as a named result set. The
* backing store should be a temporary resource. for scale-out it needs to
* be visible to the federation (since the rule executing against that
* data may be distributed across the federation based on the access path
* for the SPORelation) so it would have to be registered on some data
* service (any) in the federation and dropped in a finally {} clause.
*
* When the sets are large then they may need a backing store, e.g.,
* BigdataSet (specialized so that it does not store anything under
* the key since we can decode the Long from the key - do utility versions
* BigdataLongSet(), but the same code can serve float, double, and int as
* well. Avoid override for duplicate keys to reduce IO.
*
* @todo it should be possible to have a different action associated with each
* rule in the program, and to have a different target relation for the
* head of each rule on which we will write (mutation). Different query or
* mutation count results could be handled by an extension with
* "nextResultSet" style semantics. However, for now, all rules MUST write
* on the same buffer. Query results will therefore be multiplexed as will
* mutations counts.
*
* @todo foreign key joins: it should be possible to handle different relation
* classes in the same rules, e.g., RDF and non-RDF relations. Or even the
* SPO and lexicon relation for the RDF DB -- the latter will be useful
* for materializing externalized statements efficiently.
*
* @todo could make the return type a generic for {@link AbstractStepTask} and
* make this class a concrete implementation of that one.
*
* @author Bryan Thompson
* @version $Id$
*/
public class ProgramTask extends DataServiceCallable implements IProgramTask {
/**
*
*/
private static final long serialVersionUID = -7047397038429305180L;
protected static final transient Logger log = Logger.getLogger(ProgramTask.class);
private final ActionEnum action;
private final IStep step;
/**
* Serializable field specified when the {@link ProgramTask} will be
* submitted (via RMI or not) to a {@link DataService}. A new
* {@link IJoinNexus} is instantiated in the execution context on the
* {@link DataService} from this field.
*/
private final IJoinNexusFactory joinNexusFactory;
/**
* Note: NOT serialized! The {@link IIndexManager} will be set by
* {@link #setDataService(DataService)} if this object submitted using
* {@link DataService#submit(Callable)}.
*/
private transient IIndexManager indexManager;
// /**
// * Note: NOT serialized!
// */
// private transient DataService dataService;
@Override
public void setDataService(final DataService dataService) {
super.setDataService(dataService);
this.indexManager = dataService.getFederation();
}
/**
* Variant when the task will be submitted using
* {@link IDataService#submit(Callable)} (efficient since all indices will
* be local, but the indices must not be partitioned and must all exist on
* the target {@link DataService}).
*
* Note: the caller MUST submit the {@link ProgramTask} using
* {@link DataService#submit(Callable)} in which case {@link #dataService}
* field will be set (after the ctor) by the {@link DataService} itself. The
* {@link DataService} will be used to identify an {@link ExecutorService}
* and the {@link IJoinNexusFactory} will be used to establish access to
* indices, relations, etc. in the context of the {@link AbstractTask} - see
* {@link AbstractStepTask#submit()}.
*
* @param action
* @param step
* @param joinNexus
*/
public ProgramTask(final ActionEnum action, final IStep step,
final IJoinNexusFactory joinNexusFactory) {
if (action == null)
throw new IllegalArgumentException();
if (step == null)
throw new IllegalArgumentException();
if (joinNexusFactory == null)
throw new IllegalArgumentException();
this.action = action;
this.step = step;
this.joinNexusFactory = joinNexusFactory;
this.indexManager = null;
}
/**
* Variant when the task will be executed directly by the caller.
*
* @param action
* @param step
* @param joinNexusFactory
* @param indexManager
*
* @throws IllegalArgumentException
* if any parameter is null
.
*/
public ProgramTask(final ActionEnum action, final IStep step,
final IJoinNexusFactory joinNexusFactory, final IIndexManager indexManager) {
if (action == null)
throw new IllegalArgumentException();
if (step == null)
throw new IllegalArgumentException();
if (joinNexusFactory == null)
throw new IllegalArgumentException();
if (indexManager == null)
throw new IllegalArgumentException();
this.action = action;
this.step = step;
this.joinNexusFactory = joinNexusFactory;
this.indexManager = indexManager;
}
/**
* Execute the program.
*
* Note: There is no natural order for high-level query. Also, unless stable
* evaluation is requested, the results can be produced by parallel threads
* and the order of the materialized solution is therefore not even stable.
* The only way to have a natural order is for a sort to be imposed on the
* {@link ISolution}s.
*
* @throws Exception
*/
public Object call() throws Exception {
if (log.isDebugEnabled()) {
log.debug("begin: program=" + step.getName() + ", action="
+ action);
}
try {
final ProgramUtility util = new ProgramUtility();
if (action.isMutation()) {
final RuleStats totals;
if (!step.isRule() && ((IProgram) step).isClosure()) {
/*
* Compute closure of a flat set of rules.
*/
totals = executeClosure((IProgram) step);
} else if (util.isClosureProgram(step)) {
/*
* Compute closure of a program that embedded closure
* operations.
*/
totals = executeProgramWithEmbeddedClosure((IProgram)step);
} else {
/*
* Execute a mutation operation that does not use closure.
*/
totals = executeMutation(step);
}
RuleLog.log(totals);
return totals.mutationCount.get();
} else {
if ((!step.isRule() && ((IProgram) step).isClosure())
|| util.isClosureProgram(step)) {
/*
* The step is either a closure program or embeds a closure
* program.
*/
throw new UnsupportedOperationException(
"Closure only allowed for mutation.");
}
/*
* Execute a query.
*/
return new ChunkConsumerIterator(executeQuery(step));
}
} finally {
if (log.isDebugEnabled())
log.debug("bye");
}
}
/**
* Execute the {@link IStep} as a query.
*
* @param step
* The {@link IStep}.
*
* @return The {@link IChunkedOrderedIterator} that will drain the
* {@link ISolution}s generated by the {@link IStep}. Execution
* will be cancelled if the iterator is
* {@link ICloseableIterator#close() closed}. If execution results
* in an error, then the iterator will throw a
* {@link RuntimeException} whose cause is the error.
*
* @throws RuntimeException
*/
protected IAsynchronousIterator executeQuery(final IStep step) {
if (step == null)
throw new IllegalArgumentException();
if (log.isDebugEnabled())
log.debug("program=" + step.getName());
// buffer shared by all rules run in this query.
final IBlockingBuffer buffer = joinNexusFactory.newInstance(
indexManager).newQueryBuffer();
// the task to execute.
final QueryTask queryTask = new QueryTask(step, joinNexusFactory,
buffer, indexManager, isDataService()?getDataService():null);
Future future = null;
try {
/*
* Note: We do NOT get() this Future. This task will run
* asynchronously.
*
* The Future is canceled IF (hopefully WHEN) the iterator is
* closed.
*
* If the task itself throws an error, then it will use
* buffer#abort(cause) to notify the buffer of the cause (it will be
* passed along to the iterator) and to close the buffer (the
* iterator will notice that the buffer has been closed as well as
* that the cause was set on the buffer).
*
* @todo if the #of results is small and they are available with
* little latency then return the results inline using a fully
* buffered iterator.
*
* Note: hack pattern to ensure Future is cancelled if we exit by
* any code path before the future has been set on the BlockingBuffer.
*
* @see
* BlockingBuffer.close() does not unblock threads
*/
try {
// run the task.
future = queryTask.submit();
// set the future on the BlockingBuffer.
buffer.setFuture(future);
} finally {
if (future != null && buffer.getFuture() == null) {
// Future exists but not set on BlockingBuffer.
future.cancel(true/* mayInterruptIfRunning */);
}
}
if (log.isDebugEnabled())
log.debug("Returning iterator reading on async query task");
// return the async iterator.
return buffer.iterator();
} catch (Throwable ex) {
try {
log.error(ex, ex);
throw new RuntimeException(ex);
} finally {
buffer.close();
if (future != null) {
future.cancel(true/* mayInterruptIfRunning */);
}
}
}
}
/**
* Run a mutation {@link IStep}. The {@link IStep} may consist of many sub-{@link IStep}s.
*
* Note: If you specify {@link ITx#READ_COMMITTED} for mutation operations
* when using a federation then concurrent split/join/move can cause the
* operation to fail. It is safer to use read-consistent semantics by
* specifying {@link IIndexStore#getLastCommitTime()} instead.
*
* @param step
* The {@link IStep}.
*
* @return Metadata about the program execution, including the required
* {@link RuleStats#mutationCount}.
*
* @throws InterruptedException
* @throws ExecutionException
*/
protected RuleStats executeMutation(final IStep step)
throws InterruptedException, ExecutionException {
if (step == null)
throw new IllegalArgumentException();
if (!action.isMutation())
throw new IllegalArgumentException();
long tx = 0L;
try {
/*
* Note: The WORM Journal reads and writes against the unisolated
* view for mutation operations. This works because it never
* releases any written storage. For the RWStore we have to do
* something different to ensure that the writes driven by the
* mutation operation do not cause allocation slots to be read
* against which we need to read. For the federation, we need to
* protect against the release of the journal and index segment
* resources against which we need to read. Both of those cases (RW
* and federation) are handled using a read-only transaction to
* protect the historical view against which the mutation rule will
* read.
*/
if (indexManager instanceof IBigdataFederation>) {
/*
* Advance the read-consistent timestamp so that any writes from
* the previous rules or the last round are now visible.
*
* Note: The federation has shard-wise autoCommit semantics
* which ensure that there will be a suitable commit point for
* us to read against (as long as the caller flushed the
* buffered writes before invoking closure, which they do).
*/
final long lastCommitTime = indexManager.getLastCommitTime();
try {
/*
* A read-only tx reading from the lastCommitTime.
*
* Note: This provides a read-lock on the commit time from
* which the mutation task will read.
*
* @todo we could use the [tx] as the readTimestamp and we
* could use ITx.READ_COMMITTED rather that explicitly
* looking up the lastCommitTime.
*/
tx = ((IBigdataFederation>) indexManager)
.getTransactionService().newTx(lastCommitTime);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
// the timestamp that we will read on for this step.
joinNexusFactory.setReadTimestamp(TimestampUtility
.asHistoricalRead(lastCommitTime));
} else if (false && indexManager instanceof Journal
&& ((Journal) indexManager).getBufferStrategy()
.getBufferMode() == BufferMode.DiskRW) {
/*
* Do a commit and then advance the read-consistent timestamp so
* that any writes from the previous rules or the last round are
* now visible.
*
* Note: The RWStore needs a commit point and a read-only tx
* against that commit point in order to protect from the reuse
* of allocation slots during unisolated writes on the journal.
*
* @todo This could be captured with the notion of an checkpoint
* on the journal without introducing a full commit IF the txs
* was able to make sure of that checkpoint.
*/
final Journal jnl = (Journal) indexManager;
// force a commit before running the mutation rule.
final long lastCommitTime = jnl.commit();
/*
* A read-only tx reading from the commit point we just
* introduced.
*
* Note: This provides a read-lock on the commit time from which
* the mutation task will read.
*/
tx = jnl.newTx(lastCommitTime);
// the timestamp that we will read on for this step.
joinNexusFactory.setReadTimestamp(TimestampUtility
.asHistoricalRead(lastCommitTime));
}
final MutationTask mutationTask = new MutationTask(action,
joinNexusFactory, step, indexManager,
isDataService() ? getDataService() : null);
if (log.isDebugEnabled())
log.debug("begin: action=" + action + ", program="
+ step.getName() + ", task=" + mutationTask);
/*
* Submit task and await completion, returning the result.
*
* Note: The task is responsible for computing the aggregate
* mutation count.
*/
return mutationTask.submit().get();
} finally {
if (tx != 0L) {
/*
* Terminate the read-only tx (releases the read-lock).
*/
if (indexManager instanceof IBigdataFederation>) {
try {
((IBigdataFederation>) indexManager)
.getTransactionService().abort(tx);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
} else if (indexManager instanceof Journal) {
((Journal) indexManager).abort(tx);
}
}
}
}
/**
* Computes the closure of a set of {@link IRule}s until the relation(s) on
* which they are writing reach a "fixed point".
*
* The general approach is a series of rounds in which each rule is applied
* in turn (either sequentially or in parallel, depending on the program).
* Solutions computed for each rule in each round written onto the relation
* for the head of that rule. The process halts when no new solutions are
* computed in a given round.
*
* Note: When we are running the program on a {@link ConcurrencyManager},
* each round of the closure is submitted as a single {@link AbstractTask}.
* This allows us to do historical reads during the round and to update the
* read-behind timestamp before each round. During the round, the program
* will write on buffers that are flushed at the end of the round. Those
* buffers will use unisolated writes onto the appropriate relations.
*
*
mutation counts
*
* In order to detect the fixed point we MUST know whether or not any
* mutations were made to the relation during the round. The design does NOT
* rely on the relation count before and after the round since it would have
* to use an _exact_ range count for the relation (otherwise it is possible
* that a deleted tuple would be overwritten by a computed entailment but
* that the count would not change). However, the exact range count is
* relatively expensive which is why the design insists on getting back the
* #of elements actually written on the index from each rule in each round.
* If no rules in a given round caused an element to be written, then we are
* at the fixed point.
*
* Note: This assumes that you are following the {@link IMutableRelation}
* contract -- you MUST NOT overwrite tuples with the same key and value, or
* at least you must not report such "do nothing" overwrites in the mutation
* count!!!
*
* @param action
* The action (must be a mutation operation).
* @param program
* The program to be executed.
*
* @throws ExecutionException
* @throws InterruptedException
*/
protected RuleStats executeClosure(final IProgram program)
throws InterruptedException, ExecutionException {
if (program == null)
throw new IllegalArgumentException();
if(!program.isClosure())
throw new IllegalArgumentException();
final long begin = System.currentTimeMillis();
final RuleStats totals = joinNexusFactory.newInstance(indexManager)
.getRuleStatisticsFactory().newInstance(program);
int round = 1;
long mutationCount = 0L;
while (true) {
// mutationCount before this round.
final long mutationCount0 = totals.mutationCount.get();
if (log.isDebugEnabled())
log.debug("round=" + round + ", mutationCount(before)="
+ mutationCount0);
// if (round > 1) {
//
// /*
// * Advance the read-consistent timestamp so that any writes from
// * the previous rules or the last round are now visible.
// */
//
// joinNexusFactory.setReadTimestamp(TimestampUtility
// .asHistoricalRead(indexManager.getLastCommitTime()));
//
// }
// execute the program.
final RuleStats tmp = executeMutation(program);
/*
* This is the #of mutations from executing this round.
*
* Note: each round has its own mutation buffer so this is just the
* #of mutations in the round. This is because executeMutation()
* builds a new execution context for each round.
*/
final long mutationDelta = tmp.mutationCount.get();
// Total mutation count so far.
final long mutationCount1 = mutationCount = mutationCount0
+ tmp.mutationCount.get();
// set the round identifier.
tmp.closureRound = round;
// Aggregate the rule statistics, but not mutationCount.
totals.add(tmp);
if (log.isDebugEnabled()) {
log.debug("round# " + round + ", mutationCount(before="
+ mutationCount0 + ", after=" + mutationCount1
+ ", delta=" + mutationDelta + "):" + totals);
}
if (mutationDelta == 0L)
break;
round++;
}
final long elapsed = System.currentTimeMillis() - begin;
if (!totals.mutationCount.compareAndSet(0L, mutationCount)) {
throw new AssertionError("mutationCount=" + totals.mutationCount);
}
if (log.isInfoEnabled()) {
log.info("\nComputed fixed point: program=" + program.getName()
+ ", rounds=" + round + ", elapsed=" + elapsed + "ms");
}
return totals;
}
/**
* Execute an {@link IProgram} containing one or more sub-{@link IProgram}
* that are closure operations. The top-level program must not be a closure
* operation. All steps above the closure operations will be run in a
* sequence. The closure operations themselves will be executed using
* {@link #executeClosure(IProgram)}.
*
* Note: Any program that embeds a closure operation must be sequential
* (this is enforced by the Program class).
*
* Note: Programs that use closure operations are constrained to either (a)
* a fix point of a (normally parallel) program consisting solely of
* {@link IRule}s; or (b) a sequential program containing some steps that
* are the fix point of a (normally parallel) program consisting solely of
* {@link IRule}s.
*
* @throws ExecutionException
* @throws InterruptedException
*
* @todo this will not correctly handle programs use closure in a
* sub-sub-program.
*
* @throws IllegalArgumentException
* if program is null
* @throws IllegalArgumentException
* if program is itself a closure operation.
* @throws IllegalStateException
* unless the {@link ActionEnum} is a mutation operation.
*/
protected RuleStats executeProgramWithEmbeddedClosure(final IProgram program)
throws InterruptedException, ExecutionException {
if (program == null)
throw new IllegalArgumentException();
if (program.isClosure())
throw new IllegalArgumentException();
if(!action.isMutation()) {
throw new IllegalStateException();
}
if (log.isInfoEnabled())
log.info("program embeds closure operations");
final RuleStats totals = joinNexusFactory.newInstance(indexManager)
.getRuleStatisticsFactory().newInstance(program);
final Iterator extends IStep> itr = (program).steps();
long mutationCount = 0L;
while(itr.hasNext()) {
final IStep step = itr.next();
final RuleStats stats;
if (!step.isRule() && ((IProgram) step).isClosure()) {
// A closure step.
stats = executeClosure((IProgram) step);
} else {
// A non-closure step.
stats = executeMutation(step);
}
totals.add(stats);
/*
* Note: both a executeClosure() and executeMutation() will run with
* their own buffers so flush() reporting is not carried forward
* beyond those methods. Hence we have to aggregate the
* mutationCount ourselves for each step that we run.
*/
mutationCount += stats.mutationCount.get();
}
// transfer the final mutation count onto the total.
if (!totals.mutationCount.compareAndSet(0L, mutationCount)) {
throw new AssertionError("mutationCount=" + totals.mutationCount);
}
return totals;
}
}