com.bigdata.relation.rule.eval.AbstractStepTask Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jul 2, 2008
*/
package com.bigdata.relation.rule.eval;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.log4j.Logger;
import com.bigdata.bop.IPredicate;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.ITx;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStep;
import com.bigdata.service.DataService;
import com.bigdata.service.DataServiceCallable;
import com.bigdata.service.IDataServiceCallable;
import com.bigdata.service.ndx.ClientIndexView;
import com.bigdata.service.ndx.IClientIndex;
/**
* @author Bryan Thompson
* @version $Id$
*/
abstract public class AbstractStepTask extends DataServiceCallable
implements IStepTask, Cloneable {
protected static final transient Logger log = Logger.getLogger(AbstractStepTask.class);
protected final ActionEnum action;
protected final IJoinNexusFactory joinNexusFactory;
protected /*final*/ IIndexManager indexManager;
protected final IStep step;
/**
* Base class handles submit either to the caller's {@link ExecutorService}
* or to the {@link ConcurrencyManager} IFF the task was submitted to a
* {@link DataService}.
*
* Note: The {@link DataService} will notice the
* {@link IDataServiceCallable} interface and set a reference to
* itself using {@link #setDataService(DataService)}. {@link #submit()}
* notices this case and causes this task to be {@link #clone()}ed,
* the {@link ExecutorService} set on the clone, and the clone is then
* submitted to the {@link ConcurrencyManager} for the {@link DataService}.
*
* @param action
* Indicate whether this is a query or a mutation operation.
* @param joinNexusFactory
* Various goodies.
* @param step
* The rule or program.
* @param indexManager
* @param dataService
* non-null
iff the caller is already running on
* a {@link DataService}.
*
* @throws IllegalArgumentException
* if action is null
.
* @throws IllegalArgumentException
* if joinNexusFactory is null
.
* @throws IllegalArgumentException
* if step is null
.
*/
// * @throws IllegalArgumentException
// * if indexManager is null
.
protected AbstractStepTask(final ActionEnum action,
final IJoinNexusFactory joinNexusFactory, final IStep step,
final IIndexManager indexManager, final DataService dataService) {
if (action == null)
throw new IllegalArgumentException();
if (joinNexusFactory == null)
throw new IllegalArgumentException();
if (step == null)
throw new IllegalArgumentException();
this.action = action;
this.joinNexusFactory = joinNexusFactory;
this.step = step;
this.indexManager = indexManager; // @todo MAY be null?
if (dataService != null)
setDataService(dataService);
}
public String toString() {
return "{" + getClass().getSimpleName() + ", action=" + action
+ ", step=" + step.getName() + ", joinNexusFactory="
+ joinNexusFactory + ", indexManager=" + indexManager+"}";
}
/**
* Run program steps in parallel.
*
* @param program
* @param tasks
*
* @throws InterruptedException
* @throws ExecutionException
*
* @todo adapt the {@link ClientIndexView} code so that we notice all
* errors, log them all, and report them all in a single thrown
* exception. note that we may be running asynchronously inside of a
* task after the caller has an iterator that is draining the buffer.
* when an error occurs in that context the buffer should be flagged
* to indicate an exception and closed and the iterator should report
* the exception to the client.
*
* Do the same thing for running a program as a sequence.
*/
protected RuleStats runParallel(final IJoinNexus joinNexus, final IStep program,
final List> tasks) throws InterruptedException,
ExecutionException {
if (log.isInfoEnabled())
log.info("program=" + program.getName() + ", #tasks="
+ tasks.size());
if (indexManager == null)
throw new IllegalStateException();
final RuleStats totals = joinNexus.getRuleStatisticsFactory().newInstance(program);
final ExecutorService service = indexManager.getExecutorService();
// submit tasks and await their completion.
final List> futures = service.invokeAll(tasks);
// verify no problems with tasks.
for (Future f : futures) {
final RuleStats tmp = f.get();
totals.add(tmp);
}
if (log.isInfoEnabled())
log.info("program=" + program.getName() + ", #tasks="
+ tasks.size() + " - done");
return totals;
}
/**
* Run program steps in sequence.
*
* @param program
* @param tasks
*
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
protected RuleStats runSequential(final IJoinNexus joinNexus,
final IStep program, final List> tasks)
throws InterruptedException, ExecutionException {
final int ntasks = tasks.size();
if (log.isInfoEnabled())
log.info("program=" + program.getName() + ", #tasks=" + ntasks);
if (indexManager == null)
throw new IllegalStateException();
final ExecutorService service = indexManager.getExecutorService();
final RuleStats totals = joinNexus.getRuleStatisticsFactory().newInstance(program);
final Iterator> itr = tasks.iterator();
int n = 0;
while (itr.hasNext()) {
final Callable task = itr.next();
/*
* Submit and wait for the future.
*
* Note: tasks that are run in a sequential program are required to
* flush the buffer so that all solutions are available for the next
* step of the program. This is critical for programs that have
* dependencies between their steps.
// *
// * Note: This is handled by the task factory.
*/
final RuleStats tmp = service.submit(task).get();
totals.add(tmp);
n++;
if (log.isDebugEnabled()) {
log.debug("program=" + program.getName() + ", finished " + n
+ " of " + ntasks + " seqential tasks.");
}
}
if (log.isInfoEnabled())
log.info("program=" + program.getName() + ", #tasks=" + ntasks
+ " - done");
return totals;
}
/**
* Run a single step (sequence of one).
*
* Note: use {@link #runOne(IStep, Callable)} rather than either
* {@link #runParallel(IStep, List)} or {@link #runSequential(IStep, List)}
* when there is only one task to execute in order to avoid an unnecessary
* layering of the {@link RuleStats} (this is due to a coupling between the
* {@link RuleStats} reporting structure and the control structure for
* executing the tasks).
*
* @param program
* @param tasks
*
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
protected RuleStats runOne(final IJoinNexus joinNexus, final IStep program,
final Callable task) throws InterruptedException,
ExecutionException {
if (log.isInfoEnabled())
log.info("program=" + program.getName());
if (indexManager == null)
throw new IllegalStateException();
/*
* Submit and wait for the future.
*
* Note: tasks that are run in a sequential (or as a single task)
* program are required to flush the buffer so that all solutions are
* available for the next step of the program. This is critical for
* programs that have dependencies between their steps.
*
* Note: This is handled by the task factory.
*/
// final ExecutorService service = indexManager.getExecutorService();
// final RuleStats stats = service.submit(task).get();
final RuleStats stats;
try {
stats = task.call();
} catch (Exception ex) {
throw new ExecutionException(ex);
}
if (log.isInfoEnabled())
log.info("program=" + program.getName() + " - done");
return stats;
}
/**
* Run this task.
*
* If we are executing on a {@link DataService} then {@link #getDataService()}
* will have been set automatically and the task will be submitted to the
* {@link ConcurrencyManager} for that {@link DataService}.
*
* This condition occurs when this {@link Callable} is sent to the
* {@link DataService} using {@link DataService#submit(Callable)}. In order
* to gain access to the named indices for the relation, we have to wrap up
* this {@link Callable} as an {@link AbstractTask} that declares the
* appropriate timestamp and resources. The {@link AbstractTask} is then
* submitted to the {@link ConcurrencyManager} for execution. Once the
* {@link AbstractTask} is actually running, the inner task
* overrides the {@link #indexManager} to be
* {@link AbstractTask#getJournal()}. This provides access to the indices,
* relations, etc. appropriate for the isolation level granted to the task
* by the {@link ConcurrencyManager} - without this step the
* {@link AbstractTask} will wind up using an {@link IClientIndex} view and
* lose the benefits of access to unisolated indices.
*/
public Future submit() {
if (!isDataService()) {
return indexManager.getExecutorService().submit(this);
}
return submitToConcurrencyManager();
}
private Future submitToConcurrencyManager() {
if (!isDataService())
throw new IllegalStateException();
final ProgramUtility util = new ProgramUtility();
{
if (util.isClosureProgram(step)) {
/*
* If this is not a rule, and it is not a closure of a flat rule
* set, and there is a buried closure operation inside of the
* program then we have a problem since the steps above the
* closure should have been flattened out by the caller and run
* directly such that we never reach this point with a closure
* operation.
*/
throw new UnsupportedOperationException();
}
}
if(log.isInfoEnabled()) {
log.info("running w/ concurrency control: " + this);
}
/*
* The index names must be gathered from each relation on which the task
* will write so that they can be declared.
*
* Note: We can't just pick and choose using the access paths since we
* do not know how the propagation of bindings will effect access path
* selection so we need a lock on all of the indices before the task can
* run (at least, before it can run if it is a writer - no locks are
* required for query).
*
* 1. Find the distinct relations that are used by the rules.
*
* 2. Collect the names of the indices maintained by those relations.
*
* 3. Declare the indices since the task will need an exclusive lock on
* them (mutation) or at least the ability to read from those indices
* (query).
*
* Note: if an index is not found on the live journal then it will be
* resolved against the federation (if running in a federation). This
* means that the task will run with the live index objects when they
* are local and with IClientIndex objects when the index is remote.
*
* Note: In general, mixtures of live and remote index objects do not
* occur since indices are either partitioned (a federation) or
* monolithic (a Journal).
*
* Note: You CAN place indices onto specific data services running on a
* set of machines and set [enableOverflow := false] such that the
* indices never become partitioned. In that case you can have optimized
* joins for some relations on one data service and for other relations
* on another data service. E.g., locating the statement indices for the
* triple store on one data service, the lexicon on another, and a repo
* on a third. This will give very good performance for Query and Truth
* Maintenance since the JOINs will be mostly executing against live
* index objects.
*/
// final long timestamp;
// {
//
// // flyweight instance.
// IJoinNexus joinNexus = joinNexusFactory.newInstance(indexManager);
//
// // choose timestamp based on more recent view required.
// timestamp = action.isMutation() ? joinNexus.getWriteTimestamp()
// : joinNexus.getReadTimestamp();
//
// }
//
// if(log.isInfoEnabled()) {
//
// log.info("timestamp="+timestamp+", task="+this);
//
// }
/*
* The set of indices that we need to declare for the task.
*/
final Set indexNames = new HashSet();
if(action.isMutation()) {
/*
* Obtain the name of each index for which we want write access.
* These are the indices for the relations named in the head of each
* rule.
*
* Note: We are not actually issuing any tasks here, just
* materializing relation views so that we can obtain the names of
* the indices required for those views in order to declare them to
* the ConcurrencyManager. (In fact, we will defer the choice of the
* views on which we write until execution time since we will run
* the mutation operation inside of the ConcurrencyManager.) Hence
* the timestamp associated with the request does not really matter.
*/
final Map tmpRelations = getWriteRelations(
indexManager, step, ITx.UNISOLATED);
// Collect names of the required indices.
final Set writeIndexNames = getIndexNames(tmpRelations
.values());
indexNames.addAll(writeIndexNames);
}
{
/*
* Obtain the name of each index for which we want read access.
* These are the indices for the relation view(s) named in the tails
* of each rule.
*
* Note: We are not actually issuing any tasks here, just
* materializing relation views so that we can obtain the names of
* the indices required for those views. UNISOLATED is always safe
* in this context, even for a relation on a temporary where data
* has been written but no commits performed.
*/
final Map tmpRelations = getReadRelations(
indexManager, step, ITx.UNISOLATED);
// Collect names of the required indices.
final Set readIndexNames = getIndexNames(tmpRelations
.values());
indexNames.addAll(readIndexNames);
}
final String[] resource;
{
// The set of indices that the task will declare.
resource = indexNames.toArray(new String[] {});
if (log.isInfoEnabled()) {
log.info("resource=" + Arrays.toString(resource));
}
}
/*
* Choose the timestamp for the AbstractTask. The most interesting
* choice is whether or not the task is UNISOLATED (an unisolated task
* will obtain exclusive locks on the live indices declared by the
* task).
*
* A mutation task runs with the writeTimestamp.
*
* A query task runs with the readTimestamp.
*
* @todo handle transactions in this context.
*/
final long timestamp;
{
// final IJoinNexus joinNexus = joinNexusFactory.newInstance(indexManager);
if (action.isMutation()) {
timestamp = joinNexusFactory.getWriteTimestamp();
} else {
timestamp = joinNexusFactory.getReadTimestamp();
// timestamp = ITx.READ_COMMITTED;
}
if (log.isInfoEnabled()) {
log.info("timestamp=" + timestamp + ", task=" + this);
}
}
/*
* Create the inner task. A clone is used to prevent possible side
* effects on the original task.
*
* Note: The [timestamp] was choosen above. The writeTimestamp iff this
* is a mutation operation and the [readTimestamp] otherwise.
*/
final AbstractStepTask innerTask = this.clone();
final IConcurrencyManager concurrencyManager = getDataService()
.getConcurrencyManager();
final AbstractTask task = new AbstractTask(concurrencyManager,
timestamp, resource) {
@Override
protected Object doTask() throws Exception {
if (log.isInfoEnabled())
log.info("Executing inner task: " + this);
/*
* Override to use the IJournal exposed by the AbstractTask.
* This IJournal imposes the correct isolation control and
* allows access to the unisolated indices (if you have declared
* them and are running an UNISOLATED AbstractTask).
*/
innerTask.indexManager = getJournal();
return innerTask.call();
}
};
if(log.isInfoEnabled()) {
log.info("running on concurrencyManager: " + this);
}
/*
* Run on the concurrency manager.
*/
final Future future = (Future) concurrencyManager
.submit(task);
return future;
}
/**
* Strengthens the return type and masquerades the
* {@link CloneNotSupportedException}.
*/
public AbstractStepTask clone() {
try {
return (AbstractStepTask) super.clone();
} catch (CloneNotSupportedException ex) {
throw new RuntimeException(ex);
}
}
/**
* The set of distinct relations identified by the head of each rule in the
* program.
*/
protected Set getWriteRelationNames(IStep step) {
final Set c = new HashSet();
getWriteRelationNames(step, c);
if(log.isDebugEnabled()) {
log.debug("Found " + c.size() + " relations, program="
+ step.getName());
}
return c;
}
private void getWriteRelationNames(IStep p, Set c) {
if (p.isRule()) {
final IRule r = (IRule) p;
if (r.getHead() == null)
throw new IllegalArgumentException(
"No head for this rule: rule=" + p);
c.add(r.getHead().getOnlyRelationName());
} else {
final Iterator itr = ((IProgram)p).steps();
while (itr.hasNext()) {
getWriteRelationNames(itr.next(), c);
}
}
}
/**
* Locate the distinct relation identifiers corresponding to the head of
* each rule and resolve them to their relations.
*
* @param timestamp
* The timestamp associated with the relation views on which the
* rule(s) will write.
*
* @throws RuntimeException
* if any relation can not be resolved.
*/
protected Map getWriteRelations(
IIndexManager indexManager, IStep step, long timestamp) {
if (step == null)
throw new IllegalArgumentException();
final Map c = new HashMap();
getWriteRelations(indexManager, step, c, timestamp);
if (log.isDebugEnabled()) {
log.debug("Located " + c.size()
+ " relations in the head(s), program=" + step.getName());
}
return c;
}
@SuppressWarnings("unchecked")
private void getWriteRelations(IIndexManager indexManager, IStep p,
Map c, long timestamp) {
if (p.isRule()) {
final IRule r = (IRule) p;
final String relationIdentifier = r.getHead().getOnlyRelationName();
if (!c.containsKey(relationIdentifier)) {
final IRelation relation = (IRelation) indexManager
.getResourceLocator().locate(relationIdentifier,
timestamp);
c.put(relationIdentifier, relation);
}
} else {
final Iterator itr = ((IProgram)p).steps();
while (itr.hasNext()) {
getWriteRelations(indexManager, itr.next(), c, timestamp);
}
}
}
/**
* Locate the distinct relation identifiers corresponding to the tail(s) of
* each rule and resolve them to their relations. Note that a tail predicate
* can read on a fused view of more than one relation.
*
* @throws RuntimeException
* if any relation can not be resolved.
*/
protected Map getReadRelations(IIndexManager indexManager,
IStep step, final long timestamp) {
if (step == null)
throw new IllegalArgumentException();
final Map c = new HashMap();
getReadRelations(indexManager, step, c, timestamp);
if (log.isDebugEnabled()) {
log.debug("Located " + c.size()
+ " relations in the tail(s), program=" + step.getName());
}
return c;
}
@SuppressWarnings("unchecked")
private void getReadRelations(IIndexManager indexManager, IStep p,
Map c, long timestamp) {
if (p.isRule()) {
final IRule r = (IRule) p;
final Iterator itr = r.getTail();
while (itr.hasNext()) {
final IPredicate pred = itr.next();
final int relationCount = pred.getRelationCount();
for (int i = 0; i < relationCount; i++) {
final String relationName = pred.getRelationName(i);
if (!c.containsKey(relationName)) {
final IRelation relation = (IRelation) indexManager
.getResourceLocator().locate(relationName,
timestamp);
c.put(relationName, relation);
}
}
}
} else {
final Iterator itr = ((IProgram)p).steps();
while (itr.hasNext()) {
getReadRelations(indexManager, itr.next(), c, timestamp);
}
}
}
/**
* Create the appropriate buffers to absorb writes by the rules in the
* program that target an {@link IMutableRelation}.
*
* @return the map from relation identifier to the corresponding buffer.
*
* @throws IllegalStateException
* if the program is being executed as mutation.
* @throws RuntimeException
* If a rule requires mutation for a relation (it will write on
* the relation) and the corresponding entry in the map does not
* implement {@link IMutableRelation}.
*/
protected Map> getMutationBuffers(
IJoinNexus joinNexus, Map relations) {
if (!action.isMutation()) {
throw new IllegalStateException();
}
if(log.isDebugEnabled()) {
log.debug("");
}
final Map> c = new HashMap>(
relations.size());
final Iterator> itr = relations
.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry entry = itr.next();
final String relationIdentifier = entry.getKey();
final IRelation relation = entry.getValue();
final IBuffer buffer;
switch (action) {
case Insert:
buffer = joinNexus.newInsertBuffer((IMutableRelation)relation);
break;
case Delete:
buffer = joinNexus.newDeleteBuffer((IMutableRelation)relation);
break;
default:
throw new AssertionError("action=" + action);
}
c.put(relationIdentifier, buffer);
}
if(log.isDebugEnabled()) {
log.debug("Created "+c.size()+" mutation buffers: action="+action);
}
return c;
}
/**
* Returns the names of the indices maintained by the relations.
*
* @param c
* A collection of {@link IRelation}s.
*
* @return The names of the indices maintained by those relations.
*/
@SuppressWarnings("unchecked")
protected Set getIndexNames(final Collection c) {
if (c == null)
throw new IllegalArgumentException();
if (c.isEmpty())
return Collections.EMPTY_SET;
final Set set = new HashSet();
final Iterator itr = c.iterator();
while(itr.hasNext()) {
final IRelation relation = itr.next();
set.addAll(relation.getIndexNames());
}
return set;
}
}