com.bigdata.relation.rule.eval.RuleStats Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Oct 30, 2007
 */

package com.bigdata.relation.rule.eval;

import java.text.DateFormat;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicLong;

import com.bigdata.bop.IPredicate;
import com.bigdata.bop.joinGraph.IEvaluationPlan;
import com.bigdata.bop.joinGraph.IRangeCountFactory;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.ISlice;
import com.bigdata.relation.rule.IStep;
import com.bigdata.relation.rule.Rule;
import com.bigdata.service.ILoadBalancerService;
import com.bigdata.striterator.IKeyOrder;

/**
 * Statistics about what an {@link IStep} did when it was executed.
 * 
 * Program execution has the general form of either a set of {@link IStep}s
 * executed, at least logically, in parallel, or a sequence of {@link IStep}s
 * executed in sequence. An {@link IStep} may be a closure operation of one or
 * more {@link IRule}s, even when it is the top-level {@link IStep}. Inside of
 * a closure operation, there are one or more rounds and each rule in the
 * closure will be run in each round. There is no implicit order on the rules
 * within a closure operation, but they may be forced to execute in a sequence
 * if the total program execution context forces sequential execution.
 * 

 * In order to aggregate the data on rule execution, we want to roll up the data
 * for the individual rules along the same lines as the program structure.
 * 
 * @todo Report as counters aggregated by the {@link ILoadBalancerService}?
 * 
 * @author mikep
 * @author Bryan Thompson
 * @version $Id$
 */
public class RuleStats {

    /**
     * Delimiter string used for output.
     */
    private final static transient String sep = ", ";//"\t";//", ";
    
    /**
     * Initializes statistics for an {@link IStep}.
     * 

     * Note: This form is used when statistics will be aggregated across the
     * execution of multiple {@link IStep}s, such as when computing the closure
     * (fix point) of a set of {@link IRule}s or a sequential {@link IProgram}.
     * 
     * @param step
     *            The {@link IStep}.
     */
    public RuleStats(final IStep step) {
       
        this.rule = step;

        this.name = step.getName();

        if (step.isRule()) {

            final int tailCount = ((IRule) step).getTailCount();

            this.evalOrder = new int[tailCount];

            this.permutation = new int[tailCount];

            this.keyOrder = new IKeyOrder[tailCount];

            this.nvars = new int[tailCount];

            this.chunkCount = new long[tailCount];

            this.subqueryCount = new int[tailCount];

            this.elementCount = new long[tailCount];

            this.rangeCount = new long[tailCount];
            
        } else {

            this.evalOrder = null;

            this.permutation = null;
            
            this.keyOrder = null;
            
            this.nvars = null;
            
            this.chunkCount = null;
            
            this.subqueryCount = null;
            
            this.elementCount = null;

            this.rangeCount = null;
            
        }

//        this.nexecutions = 0;

        this.aggregation = true;
        
    }

    /**
     * Initializes statistics from an {@link iRule} and its
     * {@link IEvaluationPlan}.
     * 

     * Note: This ctor variant makes available the order of execution and range
     * count metadata from the {@link IEvaluationPlan}.
     * 
     * @param rule
     *            The {@link IRule}.
     * @param plan
     *            The {@link IEvaluationPlan}.
     * @param keyOrder
     *            Identifies which index will be used at each step in the
     *            evaluation plan (the indices are correlated with the tail
     *            predicate index, not the evaluation order index).
     */
    public RuleStats(final IRuleState ruleState) {
        
        this(ruleState.getRule());
        
        final IRule rule = ruleState.getRule();
        
        final IEvaluationPlan plan = ruleState.getPlan();
        
        final int tailCount = rule.getTailCount();
        
        System.arraycopy(ruleState.getNVars(), 0, this.nvars, 0, tailCount);

        System.arraycopy(plan.getOrder(), 0, this.evalOrder, 0, tailCount);

        System.arraycopy(ruleState.getKeyOrder(), 0, this.keyOrder, 0, tailCount);

        /*
         * Construct the permutation of the tail index order for the rule that
         * corresponds to the evaluation order.
         * 
         * permutation[i] is the sequence in the evaluation order at which
         * tail[i] is evaluated.
         */
        for (int i = 0; i < tailCount; i++) {

            permutation[evalOrder[i]] = i;

        }
        
        for (int i = 0; i < tailCount; i++) {
            
            /*
             * Copy range count data, etc.
             * 
             * @todo This data is normally cached, but for some rules and some
             * evaluation planners the plan is formed without requesting some
             * (or any) of the range counts. An optimization would allow us to
             * NOT force a range count if it was not already take.
             * 
             * However, a caching IRangeCountFactory might hide these costs
             * anyway.
             */
            
            rangeCount[ i ] = plan.rangeCount(i);
            
        }

        this.aggregation = false;
        
    }

    /**
     * True iff this is an aggregation of individual rule executions.
     */
    private boolean aggregation;
    
    /**
     * The name of the rule.
     */
    public final String name;
    
    /**
     * The {@link IStep} that was executed.
     */
    public final IStep rule;
    
    /**
     * The round is zero unless this is a closure operations and then it is an
     * integer in [1:nrounds]. 
     */
    public int closureRound = 0;
    
    /**
     * The #of {@link ISolution}s computed by the rule regardless of whether or
     * not they are written onto an {@link IMutableRelation} and regardless of
     * whether or not they duplicate a solution already computed.
     * 

     * Note: this counter will be larger than the actual #of solutions generated
     * when evaluating an {@link ISlice} since the pattern used is to invoke
     * {@link AtomicLong#incrementAndGet()} and add the {@link ISolution} to the
     * {@link IBuffer} iff the post-increment value is LT
     * {@link ISlice#getLast()}. Since we are working with the post-increment
     * value when handling an {@link ISlice} this is naturally one larger than
     * the #of {@link ISolution}s actually added to the {@link IBuffer} if we
     * halt processing because we have reached the limit on the {@link ISlice}.
     */
    public AtomicLong solutionCount = new AtomicLong();
    
    /**
     * The #of elements that were actually added to (or removed from) the
     * relation indices. This is updated based on the {@link IMutableRelation}
     * API. Correct reporting by that API and correct aggregation here are
     * critical to the correct termination of the fix point of some rule set.
     * 

     * Note: This value is ONLY incremented when the {@link IBuffer} is flushed.
     * 

     * Note: Each {@link IRule} will have its own {@link RuleStats} on which it
     * reports its mutation count. However, since evaluation of {@link IRule}s
     * MAY proceed in parallel and since multiple {@link IRule}s can write on
     * the same {@link IBuffer}, the mutation counts can not be credited
     * unambiguously to any given rule when rules run in parallel.
     * 

     * Note: {@link IBuffer#flush()} maintains a running mutationCount.
     * Therefore only the end state of that counter should be set on
     * {@link #mutationCount}. Otherwise the reported {@link #mutationCount}
     * may overreport the actual mutation count.
     * 
     * @see #add(RuleStats)
     */
    public AtomicLong mutationCount = new AtomicLong();

    /**
     * The start time for the rule execution. This is approximate (it is
     * initialized when the {@link RuleStats} instance is created). The
     * startTime is mainly intended for use when correlating collected
     * performance counters with query execution.
     */
    public final long startTime = System.currentTimeMillis();
    
    /**
     * Time to compute the entailments (ms).
     */
    public long elapsed;

    /*
     * The following are only available for the execution of a single rule.
     */

    /**
     * The #of unbound variables for the predicates in the tail of the
     * {@link Rule} (only available at the detail level of a single rule
     * instance execution). The array is correlated with the predicates index in
     * the tail of the rule NOT with its evaluation order.
     */
    public final int[] nvars;
    
    /**
     * The order of execution of the predicates in the body of a rule (only
     * available at the detail level of a single rule instance execution). When
     * aggregated, the {@link #evalOrder} will always contain zeros since it can
     * not be meaningfully combined across executions of either the same or
     * different rules.
     */
    public final int[] evalOrder;

    /**
     * The permutation of the tail predicate index order for an {@link IRule}
     * (only available when the {@link IStep} is an {@link IRule}) that
     * corresponds to the evaluation order of the tail predicate in the
     * {@link IRule}. permutation[i] is the sequence in the
     * evaluation order at which tail[i] was evaluated.
     */
    public final int[] permutation;
    
    /**
     * An array of the {@link IKeyOrder} that was used for each predicate in the
     * tail of the rule. The array is correlated with the predicates index in
     * the tail of the rule NOT with its evaluation order.
     */
    public final IKeyOrder[] keyOrder;
    
    /**
     * The predicated range counts for each predicate in the body of the rule
     * (in the order in which they were declared, not the order in which they
     * were evaluated) as reported by the {@link IRangeCountFactory}. The range
     * counts are used by the {@link IEvaluationPlan}. You can compare the
     * {@link #elementCount}s with the {@link #rangeCount}s to see the actual
     * vs predicated #of elements visited per predicate.
     */
    public final long[] rangeCount;
    
    /**
     * The #of chunks materialized for each predicate in the body of the rule
     * (in the order in which they were declared, not the order in which they
     * were evaluated).
     */
    public final long[] chunkCount;
    
    /**
     * The #of elements considered for each predicate in the body of the rule
     * (in the order in which they were declared, not the order in which they
     * were evaluated).
     */
    public final long[] elementCount;

    /**
     * The #of subqueries examined for each predicate in the rule (in the order
     * in which they were declared, not the order in which they were evaluated).
     * While there are N indices for a rule with N predicates, we only evaluate
     * a subquery for N-1 predicates so at least one index will always be
     * zero(0).
     */
    public final int[] subqueryCount;

    /**
     * Returns the headings.
     * 

     * The following are present for every record.
     * 

     * startTime
     * Timestamp taken when the rule begins to execute (approximate). This
     * is primarily used to correlate performance counters with query execution.
     * 
     * rule
     * The name of the rule.
     * elapsed
     * Elapsed execution time in milliseconds. When the rules are executed
     * concurrently the individual times will not be additive (they will sum to
     * more than the elapsed clock time owing to parallel execution).
     * solutionCount
     * The #of solutions computed.
     * solutions/sec
     * The #of solutions computed per second.
     * mutationCount
     * The #of solutions that resulted in mutations on a relation (i.e., the
     * #of distinct and new solutions). This will be zero unless the rule is
     * writing on a relation.
     * mutations/sec
     * The #of mutations per second.
     * 
     * The following are only present for individual {@link IRule} execution
     * records. Each of these is an array containing one element per tail
     * predicate in the {@link IRule}.
     * 
     * evalOrder
     * The evaluation order for the predicate(s) in the rule.
     * keyOrder
     * The {@link IKeyOrder} for the predicate(s) in the rule. Basically,
     * this tells you which index was used for each predicate.
     * nvars
     * The #of variables that will be unbound in the predicate when it is
     * evaluated (this is a function of the selected evaluation plan).
     * rangeCount
     * The #of elements predicated for each tail predicate in the rule by
     * the {@link IRangeCountFactory} on behalf of the {@link IEvaluationPlan}.
     * chunkCount
     * The #of chunks that were generated for the left-hand side of the JOIN
     * for each predicate in the tail of the rule.
     * elementCount
     * The #of elements that were actually visited for each tail predicate
     * in the rule.
     * subqueryCount
     * The #of subqueries issued for the right-hand side of the JOIN for
     * each predicate in the tail of the rule.
     * tailIndex
     * The index in which the tail predicate(s) for rule were declared for
     * the rule. This information is present iff
     * joinDetails == true was specified. Since the tail predicates
     * will be written into the table in this order, this information is mainly
     * of use if you want to resort the table while keeping the relationship
     * between the predicate evaluation order and the declared predicate order.
     * tailPredicate
     * The tail predicate(s) for rule in the order in which they were
     * declared for the rule. This information is present iff
     * joinDetails == true was specified. When present, the tail
     * predicate details will be found on their own rows in the table and will
     * be aligned under the column whose details are being broken out.
     * 
     * 
     * @todo collect data on the size of the subquery results. A large #of
     *       subqueries with a small number of results each is the main reason
     *       to unroll the JOIN loops.
     * 
     * @todo we are actually aggregating the elapsed time, which is a no-no as
     *       pointed out above when there are rules executing concurrently.
     */
    public String getHeadings() {
     
        return "startTime"//
                + sep + "rule"//
                + sep + "elapsed"//
                //
                + sep + "solutionCount"//
                + sep + "solutions/sec"
                + sep + "mutationCount" //
                + sep + "mutations/sec"//
                //
                + sep + "evalOrder" //
                + sep + "keyOrder" //
                + sep + "nvars"
                + sep + "rangeCount" //
                + sep + "chunkCount" //
                + sep + "elementCount"//
                + sep + "subqueryCount"//
                //
                + sep + "tailIndex"// 
                + sep + "tailPredicate"//
                ;

    }
    
    /**
     * Reports just the data for this record.
     * 
     * @param depth
     *            The depth at which the record was encountered within some
     *            top-level aggregation.
     * @param titles
     *            When true the titles will be displayed inline,
     *            e.g., foo=12 vs 12.
     * @param joinDetails
     *            When true , presents a tabular display of the
     *            details for each JOIN IFF this {@link RuleStats} was collected
     *            for an {@link IRule} (rather than an aggregation of
     *            {@link IRule}).
     */
    public String toStringSimple(final int depth, final boolean titles,
            final boolean joinDetails) {
        
        // the symbol used when a count is zero.
        final String ZE = "0";
        
        // the symbol used when a count was zero, so count/sec is also zero.
        final String NA = "0";
        
        // the symbol used when the elapsed time was zero, so count/sec is divide by zero.
        final String DZ = "0";
        
        final long solutionCount = this.solutionCount.get();
        
        final String solutionCountStr = solutionCount == 0 ? ZE : "" + solutionCount;

        final String solutionsPerSec = (solutionCount == 0 ? NA //
                : (elapsed == 0L ? DZ //
                        : "" + (long) (solutionCount * 1000d / elapsed)));

        final long mutationCount = this.mutationCount.get();

        final String mutationCountStr = mutationCount == 0 ? ZE : "" + mutationCount;

        final String mutationsPerSec = (mutationCount == 0 ? NA //
                : (elapsed == 0L ? DZ //
                        : "" + (long) (mutationCount * 1000d / elapsed)));

        final String q = ""; // '\"';

        final StringBuilder sb = new StringBuilder();

        final String ruleNameStr = "\"" + depthStr.substring(0, depth) + name.replace(",", "")
                + (closureRound == 0 ? "" : " round#" + closureRound) + "\"";

        /*
         * Note: This is the same format that is used for the performance
         * counters. This makes it easier to correlate what is going on in the
         * query execution log with the performance counter data.
         */
        final DateFormat dateFormat = DateFormat.getDateTimeInstance(
                DateFormat.MEDIUM/* date */, DateFormat.MEDIUM/* time */);

        final String dateStr = dateFormat.format(startTime).replace(",", ""); 
        
//        final String sep = titles ? ", " : this.sep;
        
        sb.append((titles?"startTime=":"") + dateStr);
        sb.append(sep + (titles ? "rule=" : "") + ruleNameStr);
        sb.append(sep + (titles ? "elapsed=" : "") + elapsed);
        sb.append(sep + (titles ? "solutionCount=" : "") + solutionCountStr);
        sb.append(sep + (titles ? "solutions/sec=" : "") + solutionsPerSec);
        sb.append(sep + (titles ? "mutationCount=" : "") + mutationCountStr);
        sb.append(sep + (titles ? "mutations/sec=" : "") + mutationsPerSec);
        
        if(!aggregation) {
            
            if(!joinDetails) {
            
            sb.append(sep+(titles?"evalOrder=":"")+q+toString(evalOrder)+q);
            sb.append(sep+(titles?"keyOrder=":"")+q+toString(keyOrder)+q);
            sb.append(sep+(titles?"nvars=":"")+q+toString(nvars)+q);
            sb.append(sep+(titles?"rangeCount=":"")+q+ toString(rangeCount)+q);
            sb.append(sep+(titles?"chunkCount=":"")+q+ toString(chunkCount)+q);
            sb.append(sep+(titles?"elementCount=":"")+q+ toString(elementCount)+q);
            sb.append(sep+(titles?"subqueryCount=":"")+q+toString(subqueryCount)+q);

            } else {
                
                final IRule r = (IRule)rule;
                
                final int tailCount = r.getTailCount();
                
                for (int tailIndex = 0; tailIndex < tailCount; tailIndex++) {
                    
                    if (tailIndex > 0)
                        sb.append("\n"+dateStr+sep+ruleNameStr+(sep+sep+sep+sep+sep));//",,,,,");
                    
                    final int i = showInEvalOrder?evalOrder[tailIndex]:tailIndex;
                    final int orderIndex = showInEvalOrder?tailIndex:permutation[i];
                    
                    sb.append(sep+orderIndex);
                    
                    sb.append(sep+keyOrder[i]);
                    sb.append(sep+nvars[i]);
                    sb.append(sep+rangeCount[i]);
                    sb.append(sep+chunkCount[i]);
                    sb.append(sep+elementCount[i]);
                    sb.append(sep+subqueryCount[i]);
                    
                    sb.append(sep+i);
                    
                    sb.append(sep+"\""+toString(r.getTail(i)).replace(",", " ")+"\"");
                    
                }
                
            }
            
        }

        return sb.toString();
        
    }

    /**
     * When true {@link #toStringSimple(int, boolean, boolean)}
     * will show the table view with the predicates in evaluation order rather
     * than the given order. This view is easier to read when you are examining
     * the join performance but you have to indirect through the tail index to
     * relate the predicates back to the original query form (which may already
     * have been re-ordered so that is not a great loss in many cases).
     */
    final private static boolean showInEvalOrder = true;
    
    /**
     * Return a human readable representation of the predicate. Subclasses may
     * be created that know how to externalize the predicate correctly for its
     * relation. It is a good idea to strip commas from the representation so
     * that the table can be more readily imported into worksheets that exhibit
     * problems with quoted strings embedding commas. This can be done using
     * {@link String#replace(CharSequence, CharSequence)}.
     * 
     * @param pred
     *            A predicate from the tail of an {@link IRule}.
     * 
     * @return The representation of that predicate.
     */
    protected String toString(IPredicate pred) {
       
        return pred.toString();
        
    }
    
    private String depthStr = ".........";

    private StringBuilder toString(final int[] a) {
        
        final StringBuilder sb = new StringBuilder();
        
        sb.append("[");
        
        for (int i = 0; i < a.length; i++) {
            
            if (i > 0)
                sb.append(" ");
            
            sb.append(a[i]);
            
        }
        
        sb.append("]");
        
        return sb;
        
    }


    private StringBuilder toString(final Object[] a) {
        
        final StringBuilder sb = new StringBuilder();
        
        sb.append("[");
        
        for (int i = 0; i < a.length; i++) {
            
            if (i > 0)
                sb.append(" ");
            
            sb.append(a[i]);
            
        }
        
        sb.append("]");
        
        return sb;
        
    }

    private StringBuilder toString(final long[] a) {
        
        final StringBuilder sb = new StringBuilder();
        
        sb.append("[");
        
        for (int i = 0; i < a.length; i++) {
            
            if (i > 0)
                sb.append(" ");
            
            sb.append(a[i]);
            
        }
        
        sb.append("]");
        
        return sb;
        
    }

    /**
     * Reports aggregate and details.
     */
    public String toString() {

        return toString(0L/* minElapsed */, true/* joinDetails */);

    }
    
    /**
     * Set this to [true] if you want a single rule to be formatted in a table,
     * just like a set of rules. Set it to [false] if you want a single rule all
     * on one line using [title=value] for each column.
     */
    static private final boolean showSingleRuleInTable = true;
    
    /**
     * 
     * @param minElapsed
     *            The minimum elapsed time for which details will be shown.
     * @param joinDetails
     *            When true, also presents a tabular display of
     *            the details for each JOIN in each {@link IRule}.
     */
    public String toString(final long minElapsed, final boolean joinDetails) {
            
        final int depth = 0;
        
        if (detailStats.isEmpty() && !showSingleRuleInTable) {

            return toStringSimple(depth, true/* titles */, joinDetails);
            
        }
        
        final StringBuilder sb = new StringBuilder();
        
        // Note: uses Vector.toArray() to avoid concurrent modification issues.
        final RuleStats[] a = detailStats.toArray(new RuleStats[] {});

        // aggregate level.
        sb.append("\n" + getHeadings());

        sb.append("\n" + toStringSimple(depth, false/* titles */, joinDetails));

        toString(minElapsed, joinDetails, depth+1, sb, a);

        sb.append("\n");
        
        return sb.toString();

    }

    private StringBuilder toString(final long minElapsed,
            final boolean joinDetails, final int depth, final StringBuilder sb,
            final RuleStats[] a) {

        // detail level.
        for (int i = 0; i < a.length; i++) {

            final RuleStats x = (RuleStats) a[i];

            if (x.elapsed >= minElapsed) {

                sb.append("\n" + x.toStringSimple(depth, false/* titles */, joinDetails));

                if (x.aggregation && !x.detailStats.isEmpty()) {

                    toString(minElapsed, joinDetails, depth + 1, sb,
                            x.detailStats.toArray(new RuleStats[] {}));

                }

            }

        }

        return sb;
        
    }
    
    /**
     * When execution {@link RuleState}s are being aggregated, this will contain
     * the individual {@link RuleStats} for each execution {@link RuleState}. 
     */
    public List detailStats = new Vector();
    
    /**
     * Aggregates statistics.
     * 
     * Note: since the mutation count as reported by each buffer is cumulative
     * we DO NOT aggregate the mutation counts from sub-steps as it would cause
     * double-counting when steps are executed in parallel.
     * 
     * Instead, once the total program is finished, the total mutation count is
     * computed as the value reported by {@link IBuffer#flush()} for each buffer
     * on which the program writes.
     * 
     * @param o
     *            Statistics for another rule.
     *            
     * @see ProgramTask#executeMutation(IStep)
     * @see MutationTask#flushBuffers(IJoinNexus, RuleStats, java.util.Map)
     */
    synchronized public void add(final RuleStats o) {
    
        if (o == null)
            throw new IllegalArgumentException();
        
        detailStats.add(o);
        
        if (elementCount != null && o.elementCount != null) {

            for (int i = 0; i < elementCount.length; i++) {

                // Note: order[]    is NOT aggregated.
                // Note: keyOrder[] is NOT aggregated.
                // Note: nvars[]    is NOT aggregated.

                rangeCount[i] += o.rangeCount[i];

                chunkCount[i] += o.chunkCount[i];

                elementCount[i] += o.elementCount[i];

                subqueryCount[i] += o.subqueryCount[i];

            }
            
        }
        
        solutionCount.addAndGet( o.solutionCount.get() );

        // See javadoc above.
//        mutationCount.addAndGet(o.mutationCount.get());
    
        elapsed += o.elapsed;
    
    }

}