com.bigdata.relation.rule.eval.MutationTask Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jul 2, 2008
*/
package com.bigdata.relation.rule.eval;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import com.bigdata.journal.IIndexManager;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.FlushBufferTask;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStep;
import com.bigdata.service.DataService;
/**
* A task that executes a mutation operation.
*
* @author Bryan Thompson
* @version $Id$
*/
public class MutationTask extends AbstractStepTask {
/**
*
*/
private static final long serialVersionUID = 6503299789509746764L;
protected MutationTask(ActionEnum action,
IJoinNexusFactory joinNexusFactory, IStep step,
IIndexManager indexManager, DataService dataService) {
super(action, joinNexusFactory, step, indexManager, dataService);
}
/**
* Run the task.
*
* Note: We can create the individual tasks that we need to execute now that
* we are in the correct execution context.
*
* Note: The mutation tasks write on {@link IBuffer}s and those buffers
* flush to indices in the {@link IMutableRelation}s. We have to defer the
* creation of those buffers until we are in the execution context and have
* access to the correct indices. In turn, this means that we can not create
* the tasks that we are going to execute until we have those buffers on
* hand. Hence everything gets deferred until we are in the correct
* execution context and have the actual {@link IIndexManager} with which
* the tasks will execute.
*/
public RuleStats call() throws Exception {
/*
* Create the IJoinNexus that will be used to evaluate the operation now
* that we are in the execution context and have the correct
* IIndexManager object.
*/
final IJoinNexus joinNexus = joinNexusFactory.newInstance(indexManager);
/*
* Note: This assumes that we are using the same write timestamp for
* each relation.... True for now, but consider if two transactions
* were being written on in conjunction.
*/
final Map relations = getWriteRelations(
indexManager, step, joinNexus.getWriteTimestamp());
assert !relations.isEmpty();
final Map> buffers = getMutationBuffers(
joinNexus, relations);
assert !buffers.isEmpty();
final List> tasks = newMutationTasks(step,
joinNexus, buffers);
assert !tasks.isEmpty();
final RuleStats totals;
if (tasks.size() == 1) {
totals = runOne(joinNexus, step, tasks.get(0));
} else if (!joinNexus.forceSerialExecution() && !step.isRule()
&& ((IProgram) step).isParallel()) {
totals = runParallel(joinNexus, step, tasks);
/*
* Note: buffers MUST be flushed!!!
*/
flushBuffers(joinNexus, buffers);
} else {
/*
* Note: flushes buffer after each step.
*
* Note: strictly speaking, a parallel program where
* [forceSerialExecution] is specified does not need to flush the
* buffer after each step since parallel programs do not have
* sequential dependencies between the rules in the rule set.
*
* @todo not flushing the buffer when [forceSerialExecution] is
* specified _AND_ the program is parallel would improve
* performance. Since this is relatively common when computing
* closure it makes sense to implement this tweak.
*
* @todo replace [forceSerialExecution] with [maxRuleParallelism] or
* add the latter and give the former the semantics of a debug
* switch?
*/
totals = runSequential(joinNexus, step, tasks);
}
/*
* Note: This gets the mutationCount onto [totals]. If a given buffer is
* empty (either because nothing was written onto it or because it was
* already flushed above) then this will have very little overhead
* beyond getting the current mutationCount back from flush(). You
* SHOULD NOT rely on this to flush the buffers since it does not
* parallelize that operation and flushing large buffers can be a high
* latency operation!
*/
getMutationCountFromBuffers(totals, buffers);
RuleLog.log(totals);
return totals;
}
/**
* Flush the buffer(s) and aggregate the mutation count from each buffer.
* This is the actual mutation count for the step(s) executed by the
* {@link MutationTask} (no double-counting).
*
* @throws InterruptedException
* @throws ExecutionException
*/
protected void flushBuffers(IJoinNexus joinNexus,
Map> buffers)
throws InterruptedException, ExecutionException {
if (joinNexus == null)
throw new IllegalArgumentException();
if (buffers == null)
throw new IllegalArgumentException();
final int n = buffers.size();
if (n == 0) {
if (log.isInfoEnabled())
log.info("No buffers.");
return;
}
if (n == 1) {
/*
* One buffer, so flush it in this thread.
*/
final IBuffer buffer = buffers.values().iterator().next();
if (log.isInfoEnabled())
log.info("Flushing one buffer: size="+buffer.size());
buffer.flush();
} else {
/*
* Multiple buffers, each writing on a different relation. Create a
* task per buffer and submit those tasks to the service to flush
* them in parallel.
*/
if (log.isInfoEnabled())
log.info("Flushing " + n +" buffers.");
final List> tasks = new ArrayList>( n );
final Iterator> itr = buffers.values().iterator();
while(itr.hasNext()) {
final IBuffer buffer = itr.next();
tasks.add(new FlushBufferTask(buffer));
}
final List> futures = indexManager
.getExecutorService().invokeAll(tasks);
for (Future f : futures) {
// verify task executed Ok.
f.get();
// mutationCount += f.get();
// totals.mutationCount.addAndGet(mutationCount);
}
}
}
/**
* This just reads off and aggregates the mutationCount from each buffer as
* reported by {@link IBuffer#flush()}. This is the actual mutation count
* for the step(s) executed by the {@link MutationTask} (no
* double-counting).
*
* Note: The buffers SHOULD already have been flushed as this does NOT
* parallelise the writes on the {@link IMutableRelation}s. See
* {@link #flushBuffers(IJoinNexus, RuleStats, Map)}, which does
* parallelize those writes.
*
* @return The mutation count, which was also set as a side-effect on
* totals.
*/
protected long getMutationCountFromBuffers(RuleStats totals,
Map> buffers) {
if (totals == null)
throw new IllegalArgumentException();
if (buffers == null)
throw new IllegalArgumentException();
/*
* Aggregate the mutationCount from each buffer.
*/
long mutationCount = 0L;
final Iterator> itr = buffers.values().iterator();
while (itr.hasNext()) {
final IBuffer buffer = itr.next();
mutationCount += buffer.flush();
}
/*
* Note: For a distributed "pipeline" join, the JoinTask(s) for the last
* join dimension each create their own buffers onto which they write
* their solution. This is done in order to prevent all data from
* flowing through the join master. This means that the buffer that is
* being flushed above was never written on, so the [mutationCount] as
* computed above will be zero for a _distributed_ pipeline join.
*
* @todo while this avoids an assertion error for the pipeline join, I
* need to explore more carefully how each join implementation reports
* the mutation count and how it is aggregated throughput the program
* execution.
*/
// if (mutationCount > 0) {
/*
* Atomic set of the total mutation count for all buffers on which
* the set of step(s) were writing [but only if the task did not
* update the mutationCount itself].
*/
// if (!
totals.mutationCount.compareAndSet(0L, mutationCount);
// ) {
//
// throw new AssertionError("Already set: mutationCount="
// + mutationCount + ", task=" + this);
//
// }
// }
return mutationCount;
}
/**
* Builds a set of tasks for the program.
*
* @param buffers
*
* @return
*/
protected List> newMutationTasks(final IStep step,
final IJoinNexus joinNexus,
final Map> buffers) {
if (log.isDebugEnabled())
log.debug("program=" + step.getName());
final List> tasks;
if (step.isRule()) {
if (step.isRule() && ((IRule) step).getHead() == null) {
throw new IllegalArgumentException("No head for this rule: " + step);
}
tasks = new ArrayList>(1);
final IRule rule = (IRule) step;
final IBuffer buffer = buffers.get(rule.getHead().getOnlyRelationName());
final Callable task = joinNexus.getRuleTaskFactory(
false/* parallel */, rule).newTask(rule, joinNexus, buffer);
tasks.add(task);
} else {
final IProgram program = (IProgram)step;
final boolean parallel = program.isParallel();
tasks = new ArrayList>(program.stepCount());
final Iterator itr = program.steps();
while (itr.hasNext()) {
// @todo handle sub-programs.
final IRule rule = (IRule) itr.next();
if (rule.getHead() == null) {
throw new IllegalArgumentException("No head for this rule: " + rule);
}
final IBuffer buffer = buffers.get(rule.getHead()
.getOnlyRelationName());
final IStepTask task = joinNexus.getRuleTaskFactory(parallel,
rule).newTask(rule, joinNexus, buffer);
if (!parallel || joinNexus.forceSerialExecution()) {
/*
* Tasks for sequential mutation steps are always wrapped to
* ensure that the thread-safe buffer is flushed onto the
* mutable relation after each rule executes. This is
* necessary in order for the results of one rule in a
* sequential program to be visible to the next rule in that
* sequential program.
*/
tasks.add(new RunRuleAndFlushBufferTask(task, buffer));
} else {
/*
* Add the task.
*/
tasks.add(task);
}
}
}
if (log.isDebugEnabled()) {
log.debug("Created " + tasks.size() + " mutation tasks: action="
+ action);
}
return tasks;
}
}