com.bigdata.relation.rule.eval.AbstractStepTask Maven / Gradle / Ivy

Go to download
/*

 Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

 Contact:
 SYSTAP, LLC DBA Blazegraph
 2501 Calvert ST NW #106
 Washington, DC 20008
 [email protected]

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */
/*
 * Created on Jul 2, 2008
 */

package com.bigdata.relation.rule.eval;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import org.apache.log4j.Logger;

import com.bigdata.bop.IPredicate;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.ITx;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStep;
import com.bigdata.service.DataService;
import com.bigdata.service.DataServiceCallable;
import com.bigdata.service.IDataServiceCallable;
import com.bigdata.service.ndx.ClientIndexView;
import com.bigdata.service.ndx.IClientIndex;

/**
 * @author Bryan Thompson
 * @version $Id$
 */
abstract public class AbstractStepTask extends DataServiceCallable
        implements IStepTask, Cloneable {

    protected static final transient Logger log = Logger.getLogger(AbstractStepTask.class);

    protected final ActionEnum action;
    protected final IJoinNexusFactory joinNexusFactory;
    protected /*final*/ IIndexManager indexManager;
    protected final IStep step;

    /**
     * Base class handles submit either to the caller's {@link ExecutorService}
     * or to the {@link ConcurrencyManager} IFF the task was submitted to a
     * {@link DataService}.
     * 
     * Note: The {@link DataService} will notice the
     * {@link IDataServiceCallable} interface and set a reference to
     * itself using {@link #setDataService(DataService)}. {@link #submit()}
     * notices this case and causes this task to be {@link #clone()}ed,
     * the {@link ExecutorService} set on the clone, and the clone is then
     * submitted to the {@link ConcurrencyManager} for the {@link DataService}.
     * 
     * @param action
     *            Indicate whether this is a query or a mutation operation.
     * @param joinNexusFactory
     *            Various goodies.
     * @param step
     *            The rule or program.
     * @param indexManager
     * @param dataService
     *            non-null iff the caller is already running on
     *            a {@link DataService}.
     * 
     * @throws IllegalArgumentException
     *             if action is null.
     * @throws IllegalArgumentException
     *             if joinNexusFactory is null.
     * @throws IllegalArgumentException
     *             if step is null.
     */
//    * @throws IllegalArgumentException
//    *             if indexManager is null.
    protected AbstractStepTask(final ActionEnum action,
            final IJoinNexusFactory joinNexusFactory, final IStep step,
            final IIndexManager indexManager, final DataService dataService) {

        if (action == null)
            throw new IllegalArgumentException();

        if (joinNexusFactory == null)
            throw new IllegalArgumentException();
        
        if (step == null)
            throw new IllegalArgumentException();

        this.action = action;
        
        this.joinNexusFactory = joinNexusFactory;
        
        this.step = step;
        
        this.indexManager = indexManager; // @todo MAY be null?
        
        if (dataService != null)
            setDataService(dataService);
        
    }

    public String toString() {
        
        return "{" + getClass().getSimpleName() + ", action=" + action
                + ", step=" + step.getName() + ", joinNexusFactory="
                + joinNexusFactory + ", indexManager=" + indexManager+"}"; 
        
    }
    
    /**
     * Run program steps in parallel.
     * 
     * @param program
     * @param tasks
     * 
     * @throws InterruptedException
     * @throws ExecutionException
     * 
     * @todo adapt the {@link ClientIndexView} code so that we notice all
     *       errors, log them all, and report them all in a single thrown
     *       exception. note that we may be running asynchronously inside of a
     *       task after the caller has an iterator that is draining the buffer.
     *       when an error occurs in that context the buffer should be flagged
     *       to indicate an exception and closed and the iterator should report
     *       the exception to the client.
     *       

     *       Do the same thing for running a program as a sequence.
     */
    protected RuleStats runParallel(final IJoinNexus joinNexus, final IStep program,
            final List> tasks) throws InterruptedException,
            ExecutionException {
    
        if (log.isInfoEnabled())
            log.info("program=" + program.getName() + ", #tasks="
                    + tasks.size());
        
        if (indexManager == null)
            throw new IllegalStateException();
        
        final RuleStats totals = joinNexus.getRuleStatisticsFactory().newInstance(program);
        
        final ExecutorService service = indexManager.getExecutorService();
        
        // submit tasks and await their completion.
        final List> futures = service.invokeAll(tasks);
    
        // verify no problems with tasks.
        for (Future f : futures) {
    
            final RuleStats tmp = f.get();
            
            totals.add(tmp);
    
        }
    
        if (log.isInfoEnabled())
            log.info("program=" + program.getName() + ", #tasks="
                    + tasks.size() + " - done");
    
        return totals;
    
    }

    /**
     * Run program steps in sequence.
     * 
     * @param program
     * @param tasks
     * 
     * @return 
     * @throws InterruptedException
     * @throws ExecutionException
     */
    protected RuleStats runSequential(final IJoinNexus joinNexus,
            final IStep program, final List> tasks)
            throws InterruptedException, ExecutionException {
    
        final int ntasks = tasks.size();
        
        if (log.isInfoEnabled())
            log.info("program=" + program.getName() + ", #tasks=" + ntasks);
    
        if (indexManager == null)
            throw new IllegalStateException();
        
        final ExecutorService service = indexManager.getExecutorService();
        
        final RuleStats totals = joinNexus.getRuleStatisticsFactory().newInstance(program);
        
        final Iterator> itr = tasks.iterator();
    
        int n = 0;
        
        while (itr.hasNext()) {
    
            final Callable task = itr.next();
    
            /*
             * Submit and wait for the future.
             * 
             * Note: tasks that are run in a sequential program are required to
             * flush the buffer so that all solutions are available for the next
             * step of the program. This is critical for programs that have
             * dependencies between their steps.
//             * 
//             * Note: This is handled by the task factory.
             */
            final RuleStats tmp = service.submit(task).get();
    
            totals.add(tmp);

            n++;

            if (log.isDebugEnabled()) {

                log.debug("program=" + program.getName() + ", finished " + n
                        + " of " + ntasks + " seqential tasks.");

            }

        }

        if (log.isInfoEnabled())
            log.info("program=" + program.getName() + ", #tasks=" + ntasks
                    + " - done");
    
        return totals;
    
    }

    /**
     * Run a single step (sequence of one).
     * 

     * Note: use {@link #runOne(IStep, Callable)} rather than either
     * {@link #runParallel(IStep, List)} or {@link #runSequential(IStep, List)}
     * when there is only one task to execute in order to avoid an unnecessary
     * layering of the {@link RuleStats} (this is due to a coupling between the
     * {@link RuleStats} reporting structure and the control structure for
     * executing the tasks).
     * 
     * @param program
     * @param tasks
     * 
     * @return
     * @throws InterruptedException
     * @throws ExecutionException
     */
    protected RuleStats runOne(final IJoinNexus joinNexus, final IStep program,
            final Callable task) throws InterruptedException,
            ExecutionException {
    
        if (log.isInfoEnabled())
            log.info("program=" + program.getName());
    
        if (indexManager == null)
            throw new IllegalStateException();
        
        /*
         * Submit and wait for the future.
         * 
         * Note: tasks that are run in a sequential (or as a single task)
         * program are required to flush the buffer so that all solutions are
         * available for the next step of the program. This is critical for
         * programs that have dependencies between their steps.
         * 
         * Note: This is handled by the task factory.
         */
//        final ExecutorService service = indexManager.getExecutorService();
//        final RuleStats stats = service.submit(task).get();
        final RuleStats stats;
        try {
            stats = task.call();
        } catch (Exception ex) {
            throw new ExecutionException(ex);
        }

        if (log.isInfoEnabled())
            log.info("program=" + program.getName() + " - done");
    
        return stats;
    
    }

    /**
     * Run this task.
     * 

     * If we are executing on a {@link DataService} then {@link #getDataService()}
     * will have been set automatically and the task will be submitted to the
     * {@link ConcurrencyManager} for that {@link DataService}.
     * 
     * This condition occurs when this {@link Callable} is sent to the
     * {@link DataService} using {@link DataService#submit(Callable)}. In order
     * to gain access to the named indices for the relation, we have to wrap up
     * this {@link Callable} as an {@link AbstractTask} that declares the
     * appropriate timestamp and resources. The {@link AbstractTask} is then
     * submitted to the {@link ConcurrencyManager} for execution. Once the
     * {@link AbstractTask} is actually running, the inner task
     * overrides the {@link #indexManager} to be
     * {@link AbstractTask#getJournal()}. This provides access to the indices,
     * relations, etc. appropriate for the isolation level granted to the task
     * by the {@link ConcurrencyManager} - without this step the
     * {@link AbstractTask} will wind up using an {@link IClientIndex} view and
     * lose the benefits of access to unisolated indices.
     */
    public Future submit() {

        if (!isDataService()) {

            return indexManager.getExecutorService().submit(this);

        }

        return submitToConcurrencyManager();
        
    }
    
    private Future submitToConcurrencyManager() {
        
        if (!isDataService())
            throw new IllegalStateException();

        final ProgramUtility util = new ProgramUtility();

        {

            if (util.isClosureProgram(step)) {

                /*
                 * If this is not a rule, and it is not a closure of a flat rule
                 * set, and there is a buried closure operation inside of the
                 * program then we have a problem since the steps above the
                 * closure should have been flattened out by the caller and run
                 * directly such that we never reach this point with a closure
                 * operation.
                 */

                throw new UnsupportedOperationException();

            }

        }
        
        if(log.isInfoEnabled()) {

            log.info("running w/ concurrency control: " + this);
            
        }

        /*
         * The index names must be gathered from each relation on which the task
         * will write so that they can be declared.
         * 
         * Note: We can't just pick and choose using the access paths since we
         * do not know how the propagation of bindings will effect access path
         * selection so we need a lock on all of the indices before the task can
         * run (at least, before it can run if it is a writer - no locks are
         * required for query).
         * 
         * 1. Find the distinct relations that are used by the rules.
         * 
         * 2. Collect the names of the indices maintained by those relations.
         * 
         * 3. Declare the indices since the task will need an exclusive lock on
         * them (mutation) or at least the ability to read from those indices
         * (query).
         * 
         * Note: if an index is not found on the live journal then it will be
         * resolved against the federation (if running in a federation). This
         * means that the task will run with the live index objects when they
         * are local and with IClientIndex objects when the index is remote.
         * 
         * Note: In general, mixtures of live and remote index objects do not
         * occur since indices are either partitioned (a federation) or
         * monolithic (a Journal).
         * 
         * Note: You CAN place indices onto specific data services running on a
         * set of machines and set [enableOverflow := false] such that the
         * indices never become partitioned. In that case you can have optimized
         * joins for some relations on one data service and for other relations
         * on another data service. E.g., locating the statement indices for the
         * triple store on one data service, the lexicon on another, and a repo
         * on a third. This will give very good performance for Query and Truth
         * Maintenance since the JOINs will be mostly executing against live
         * index objects.
         */

//        final long timestamp;
//        {
//         
//            // flyweight instance.
//            IJoinNexus joinNexus = joinNexusFactory.newInstance(indexManager);
//            
//            // choose timestamp based on more recent view required.
//            timestamp = action.isMutation() ? joinNexus.getWriteTimestamp()
//                    : joinNexus.getReadTimestamp();
//            
//        }
//
//        if(log.isInfoEnabled()) {
//         
//            log.info("timestamp="+timestamp+", task="+this);
//            
//        }

        /*
         * The set of indices that we need to declare for the task.
         */
        final Set indexNames = new HashSet();
        
        if(action.isMutation()) {
         
            /*
             * Obtain the name of each index for which we want write access.
             * These are the indices for the relations named in the head of each
             * rule.
             * 
             * Note: We are not actually issuing any tasks here, just
             * materializing relation views so that we can obtain the names of
             * the indices required for those views in order to declare them to
             * the ConcurrencyManager. (In fact, we will defer the choice of the
             * views on which we write until execution time since we will run
             * the mutation operation inside of the ConcurrencyManager.) Hence
             * the timestamp associated with the request does not really matter.
             */
            final Map tmpRelations = getWriteRelations(
                indexManager, step, ITx.UNISOLATED);
            
            // Collect names of the required indices.
            final Set writeIndexNames = getIndexNames(tmpRelations
                    .values());
            
            indexNames.addAll(writeIndexNames);

        }

        {

            /*
             * Obtain the name of each index for which we want read access.
             * These are the indices for the relation view(s) named in the tails
             * of each rule.
             * 
             * Note: We are not actually issuing any tasks here, just
             * materializing relation views so that we can obtain the names of
             * the indices required for those views. UNISOLATED is always safe
             * in this context, even for a relation on a temporary where data
             * has been written but no commits performed.
             */
            final Map tmpRelations = getReadRelations(
                    indexManager, step, ITx.UNISOLATED);
                
            // Collect names of the required indices.
            final Set readIndexNames = getIndexNames(tmpRelations
                    .values());
                
            indexNames.addAll(readIndexNames);
            
        }
        
        final String[] resource;
        {
            
             // The set of indices that the task will declare.
            resource = indexNames.toArray(new String[] {});

            if (log.isInfoEnabled()) {

                log.info("resource=" + Arrays.toString(resource));

            }

        }

        /*
         * Choose the timestamp for the AbstractTask. The most interesting
         * choice is whether or not the task is UNISOLATED (an unisolated task
         * will obtain exclusive locks on the live indices declared by the
         * task).
         * 
         * A mutation task runs with the writeTimestamp.
         * 
         * A query task runs with the readTimestamp.
         * 
         * @todo handle transactions in this context.
         */
        final long timestamp;
        {
            
//            final IJoinNexus joinNexus = joinNexusFactory.newInstance(indexManager);
            
            if (action.isMutation()) {

                timestamp = joinNexusFactory.getWriteTimestamp();

            } else {

                timestamp = joinNexusFactory.getReadTimestamp();
//                timestamp = ITx.READ_COMMITTED;
                
            }

            if (log.isInfoEnabled()) {

                log.info("timestamp=" + timestamp + ", task=" + this);

            }
            
        }
        
        /*
         * Create the inner task. A clone is used to prevent possible side
         * effects on the original task.
         * 
         * Note: The [timestamp] was choosen above. The writeTimestamp iff this
         * is a mutation operation and the [readTimestamp] otherwise.
         */
        final AbstractStepTask innerTask = this.clone();

        final IConcurrencyManager concurrencyManager = getDataService()
                .getConcurrencyManager();
        
        final AbstractTask task = new AbstractTask(concurrencyManager,
                timestamp, resource) {

            @Override
            protected Object doTask() throws Exception {

                if (log.isInfoEnabled())
                    log.info("Executing inner task: " + this);

                /*
                 * Override to use the IJournal exposed by the AbstractTask.
                 * This IJournal imposes the correct isolation control and
                 * allows access to the unisolated indices (if you have declared
                 * them and are running an UNISOLATED AbstractTask).
                 */
                innerTask.indexManager = getJournal();
                
                return innerTask.call();

            }

        };

        if(log.isInfoEnabled()) {

            log.info("running on concurrencyManager: " + this);
            
        }
        
        /*
         * Run on the concurrency manager.
         */
        final Future future = (Future) concurrencyManager
                .submit(task);

        return future;

    }

    /**
     * Strengthens the return type and masquerades the
     * {@link CloneNotSupportedException}.
     */
    public AbstractStepTask clone() {

        try {

            return (AbstractStepTask) super.clone();

        } catch (CloneNotSupportedException ex) {

            throw new RuntimeException(ex);

        }

    }

    /**
     * The set of distinct relations identified by the head of each rule in the
     * program.
     */
    protected Set getWriteRelationNames(IStep step) {

        final Set c = new HashSet();
        
        getWriteRelationNames(step, c);

        if(log.isDebugEnabled()) {
            
            log.debug("Found " + c.size() + " relations, program="
                    + step.getName());
            
        }

        return c;
        
    }
    
    private void getWriteRelationNames(IStep p, Set c) {

        if (p.isRule()) {

            final IRule r = (IRule) p;

            if (r.getHead() == null)
                throw new IllegalArgumentException(
                        "No head for this rule: rule=" + p);
            
            c.add(r.getHead().getOnlyRelationName());

        } else {
            
            final Iterator itr = ((IProgram)p).steps();

            while (itr.hasNext()) {

                getWriteRelationNames(itr.next(), c);

            }

        }

    }
    
    /**
     * Locate the distinct relation identifiers corresponding to the head of
     * each rule and resolve them to their relations.
     * 
     * @param timestamp
     *            The timestamp associated with the relation views on which the
     *            rule(s) will write.
     * 
     * @throws RuntimeException
     *             if any relation can not be resolved.
     */
    protected Map getWriteRelations(
            IIndexManager indexManager, IStep step, long timestamp) {

        if (step == null)
            throw new IllegalArgumentException();

        final Map c = new HashMap();

        getWriteRelations(indexManager, step, c, timestamp);

        if (log.isDebugEnabled()) {

            log.debug("Located " + c.size()
                    + " relations in the head(s), program=" + step.getName());

        }

        return c;

    }

    @SuppressWarnings("unchecked")
    private void getWriteRelations(IIndexManager indexManager, IStep p,
            Map c, long timestamp) {

        if (p.isRule()) {

            final IRule r = (IRule) p;

            final String relationIdentifier = r.getHead().getOnlyRelationName();

            if (!c.containsKey(relationIdentifier)) {

                final IRelation relation = (IRelation) indexManager
                        .getResourceLocator().locate(relationIdentifier,
                                timestamp);

                c.put(relationIdentifier, relation);

            }

        } else {
            
            final Iterator itr = ((IProgram)p).steps();

            while (itr.hasNext()) {

                getWriteRelations(indexManager, itr.next(), c, timestamp);

            }

        }

    }
    
    /**
     * Locate the distinct relation identifiers corresponding to the tail(s) of
     * each rule and resolve them to their relations. Note that a tail predicate
     * can read on a fused view of more than one relation.
     * 
     * @throws RuntimeException
     *             if any relation can not be resolved.
     */
    protected Map getReadRelations(IIndexManager indexManager,
            IStep step, final long timestamp) {

        if (step == null)
            throw new IllegalArgumentException();

        final Map c = new HashMap();

        getReadRelations(indexManager, step, c, timestamp);

        if (log.isDebugEnabled()) {

            log.debug("Located " + c.size()
                    + " relations in the tail(s), program=" + step.getName());

        }

        return c;

    }

    @SuppressWarnings("unchecked")
    private void getReadRelations(IIndexManager indexManager, IStep p,
            Map c, long timestamp) {

        if (p.isRule()) {

            final IRule r = (IRule) p;

            final Iterator itr = r.getTail();

            while (itr.hasNext()) {

                final IPredicate pred = itr.next();

                final int relationCount = pred.getRelationCount();

                for (int i = 0; i < relationCount; i++) {

                    final String relationName = pred.getRelationName(i);

                    if (!c.containsKey(relationName)) {

                        final IRelation relation = (IRelation) indexManager
                                .getResourceLocator().locate(relationName,
                                        timestamp);

                        c.put(relationName, relation);

                    }

                }
                
            }

        } else {
            
            final Iterator itr = ((IProgram)p).steps();

            while (itr.hasNext()) {

                getReadRelations(indexManager, itr.next(), c, timestamp);

            }

        }

    }
    
    /**
     * Create the appropriate buffers to absorb writes by the rules in the
     * program that target an {@link IMutableRelation}.
     * 
     * @return the map from relation identifier to the corresponding buffer.
     * 
     * @throws IllegalStateException
     *             if the program is being executed as mutation.
     * @throws RuntimeException
     *             If a rule requires mutation for a relation (it will write on
     *             the relation) and the corresponding entry in the map does not
     *             implement {@link IMutableRelation}.
     */
    protected Map> getMutationBuffers(
            IJoinNexus joinNexus, Map relations) {

        if (!action.isMutation()) {

            throw new IllegalStateException();
            
        }

        if(log.isDebugEnabled()) {
            
            log.debug("");
            
        }

        final Map> c = new HashMap>(
                relations.size());

        final Iterator> itr = relations
                .entrySet().iterator();

        while (itr.hasNext()) {

            final Map.Entry entry = itr.next();

            final String relationIdentifier = entry.getKey();

            final IRelation relation = entry.getValue();

            final IBuffer buffer;

            switch (action) {
            
            case Insert:
                
                buffer = joinNexus.newInsertBuffer((IMutableRelation)relation);
                
                break;
                
            case Delete:
                
                buffer = joinNexus.newDeleteBuffer((IMutableRelation)relation);
                
                break;
                
            default:
                
                throw new AssertionError("action=" + action);
            
            }

            c.put(relationIdentifier, buffer);
            
        }

        if(log.isDebugEnabled()) {
            
            log.debug("Created "+c.size()+" mutation buffers: action="+action);
            
        }

        return c;
        
    }
    
    /**
     * Returns the names of the indices maintained by the relations.
     * 
     * @param c
     *            A collection of {@link IRelation}s.
     * 
     * @return The names of the indices maintained by those relations.
     */
    @SuppressWarnings("unchecked")
    protected Set getIndexNames(final Collection c) {

        if (c == null)
            throw new IllegalArgumentException();

        if (c.isEmpty())
            return Collections.EMPTY_SET;

        final Set set = new HashSet();
        
        final Iterator itr = c.iterator();
        
        while(itr.hasNext()) {
            
            final IRelation relation = itr.next();
            
            set.addAll(relation.getIndexNames());
            
        }

        return set;
        
    }

}