All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.relation.rule.eval.RuleStats Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Oct 30, 2007
 */

package com.bigdata.relation.rule.eval;

import java.text.DateFormat;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicLong;

import com.bigdata.bop.IPredicate;
import com.bigdata.bop.joinGraph.IEvaluationPlan;
import com.bigdata.bop.joinGraph.IRangeCountFactory;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.ISlice;
import com.bigdata.relation.rule.IStep;
import com.bigdata.relation.rule.Rule;
import com.bigdata.service.ILoadBalancerService;
import com.bigdata.striterator.IKeyOrder;

/**
 * Statistics about what an {@link IStep} did when it was executed.
 * 

* Program execution has the general form of either a set of {@link IStep}s * executed, at least logically, in parallel, or a sequence of {@link IStep}s * executed in sequence. An {@link IStep} may be a closure operation of one or * more {@link IRule}s, even when it is the top-level {@link IStep}. Inside of * a closure operation, there are one or more rounds and each rule in the * closure will be run in each round. There is no implicit order on the rules * within a closure operation, but they may be forced to execute in a sequence * if the total program execution context forces sequential execution. *

* In order to aggregate the data on rule execution, we want to roll up the data * for the individual rules along the same lines as the program structure. * * @todo Report as counters aggregated by the {@link ILoadBalancerService}? * * @author mikep * @author Bryan Thompson * @version $Id$ */ public class RuleStats { /** * Delimiter string used for output. */ private final static transient String sep = ", ";//"\t";//", "; /** * Initializes statistics for an {@link IStep}. *

* Note: This form is used when statistics will be aggregated across the * execution of multiple {@link IStep}s, such as when computing the closure * (fix point) of a set of {@link IRule}s or a sequential {@link IProgram}. * * @param step * The {@link IStep}. */ public RuleStats(final IStep step) { this.rule = step; this.name = step.getName(); if (step.isRule()) { final int tailCount = ((IRule) step).getTailCount(); this.evalOrder = new int[tailCount]; this.permutation = new int[tailCount]; this.keyOrder = new IKeyOrder[tailCount]; this.nvars = new int[tailCount]; this.chunkCount = new long[tailCount]; this.subqueryCount = new int[tailCount]; this.elementCount = new long[tailCount]; this.rangeCount = new long[tailCount]; } else { this.evalOrder = null; this.permutation = null; this.keyOrder = null; this.nvars = null; this.chunkCount = null; this.subqueryCount = null; this.elementCount = null; this.rangeCount = null; } // this.nexecutions = 0; this.aggregation = true; } /** * Initializes statistics from an {@link iRule} and its * {@link IEvaluationPlan}. *

* Note: This ctor variant makes available the order of execution and range * count metadata from the {@link IEvaluationPlan}. * * @param rule * The {@link IRule}. * @param plan * The {@link IEvaluationPlan}. * @param keyOrder * Identifies which index will be used at each step in the * evaluation plan (the indices are correlated with the tail * predicate index, not the evaluation order index). */ public RuleStats(final IRuleState ruleState) { this(ruleState.getRule()); final IRule rule = ruleState.getRule(); final IEvaluationPlan plan = ruleState.getPlan(); final int tailCount = rule.getTailCount(); System.arraycopy(ruleState.getNVars(), 0, this.nvars, 0, tailCount); System.arraycopy(plan.getOrder(), 0, this.evalOrder, 0, tailCount); System.arraycopy(ruleState.getKeyOrder(), 0, this.keyOrder, 0, tailCount); /* * Construct the permutation of the tail index order for the rule that * corresponds to the evaluation order. * * permutation[i] is the sequence in the evaluation order at which * tail[i] is evaluated. */ for (int i = 0; i < tailCount; i++) { permutation[evalOrder[i]] = i; } for (int i = 0; i < tailCount; i++) { /* * Copy range count data, etc. * * @todo This data is normally cached, but for some rules and some * evaluation planners the plan is formed without requesting some * (or any) of the range counts. An optimization would allow us to * NOT force a range count if it was not already take. * * However, a caching IRangeCountFactory might hide these costs * anyway. */ rangeCount[ i ] = plan.rangeCount(i); } this.aggregation = false; } /** * True iff this is an aggregation of individual rule executions. */ private boolean aggregation; /** * The name of the rule. */ public final String name; /** * The {@link IStep} that was executed. */ public final IStep rule; /** * The round is zero unless this is a closure operations and then it is an * integer in [1:nrounds]. */ public int closureRound = 0; /** * The #of {@link ISolution}s computed by the rule regardless of whether or * not they are written onto an {@link IMutableRelation} and regardless of * whether or not they duplicate a solution already computed. *

* Note: this counter will be larger than the actual #of solutions generated * when evaluating an {@link ISlice} since the pattern used is to invoke * {@link AtomicLong#incrementAndGet()} and add the {@link ISolution} to the * {@link IBuffer} iff the post-increment value is LT * {@link ISlice#getLast()}. Since we are working with the post-increment * value when handling an {@link ISlice} this is naturally one larger than * the #of {@link ISolution}s actually added to the {@link IBuffer} if we * halt processing because we have reached the limit on the {@link ISlice}. */ public AtomicLong solutionCount = new AtomicLong(); /** * The #of elements that were actually added to (or removed from) the * relation indices. This is updated based on the {@link IMutableRelation} * API. Correct reporting by that API and correct aggregation here are * critical to the correct termination of the fix point of some rule set. *

* Note: This value is ONLY incremented when the {@link IBuffer} is flushed. *

* Note: Each {@link IRule} will have its own {@link RuleStats} on which it * reports its mutation count. However, since evaluation of {@link IRule}s * MAY proceed in parallel and since multiple {@link IRule}s can write on * the same {@link IBuffer}, the mutation counts can not be credited * unambiguously to any given rule when rules run in parallel. *

* Note: {@link IBuffer#flush()} maintains a running mutationCount. * Therefore only the end state of that counter should be set on * {@link #mutationCount}. Otherwise the reported {@link #mutationCount} * may overreport the actual mutation count. * * @see #add(RuleStats) */ public AtomicLong mutationCount = new AtomicLong(); /** * The start time for the rule execution. This is approximate (it is * initialized when the {@link RuleStats} instance is created). The * startTime is mainly intended for use when correlating collected * performance counters with query execution. */ public final long startTime = System.currentTimeMillis(); /** * Time to compute the entailments (ms). */ public long elapsed; /* * The following are only available for the execution of a single rule. */ /** * The #of unbound variables for the predicates in the tail of the * {@link Rule} (only available at the detail level of a single rule * instance execution). The array is correlated with the predicates index in * the tail of the rule NOT with its evaluation order. */ public final int[] nvars; /** * The order of execution of the predicates in the body of a rule (only * available at the detail level of a single rule instance execution). When * aggregated, the {@link #evalOrder} will always contain zeros since it can * not be meaningfully combined across executions of either the same or * different rules. */ public final int[] evalOrder; /** * The permutation of the tail predicate index order for an {@link IRule} * (only available when the {@link IStep} is an {@link IRule}) that * corresponds to the evaluation order of the tail predicate in the * {@link IRule}. permutation[i] is the sequence in the * evaluation order at which tail[i] was evaluated. */ public final int[] permutation; /** * An array of the {@link IKeyOrder} that was used for each predicate in the * tail of the rule. The array is correlated with the predicates index in * the tail of the rule NOT with its evaluation order. */ public final IKeyOrder[] keyOrder; /** * The predicated range counts for each predicate in the body of the rule * (in the order in which they were declared, not the order in which they * were evaluated) as reported by the {@link IRangeCountFactory}. The range * counts are used by the {@link IEvaluationPlan}. You can compare the * {@link #elementCount}s with the {@link #rangeCount}s to see the actual * vs predicated #of elements visited per predicate. */ public final long[] rangeCount; /** * The #of chunks materialized for each predicate in the body of the rule * (in the order in which they were declared, not the order in which they * were evaluated). */ public final long[] chunkCount; /** * The #of elements considered for each predicate in the body of the rule * (in the order in which they were declared, not the order in which they * were evaluated). */ public final long[] elementCount; /** * The #of subqueries examined for each predicate in the rule (in the order * in which they were declared, not the order in which they were evaluated). * While there are N indices for a rule with N predicates, we only evaluate * a subquery for N-1 predicates so at least one index will always be * zero(0). */ public final int[] subqueryCount; /** * Returns the headings. *

* The following are present for every record. *

*
startTime
*
Timestamp taken when the rule begins to execute (approximate). This * is primarily used to correlate performance counters with query execution. *
*
rule
*
The name of the rule.
*
elapsed
*
Elapsed execution time in milliseconds. When the rules are executed * concurrently the individual times will not be additive (they will sum to * more than the elapsed clock time owing to parallel execution).
*
solutionCount
*
The #of solutions computed.
*
solutions/sec
*
The #of solutions computed per second.
*
mutationCount
*
The #of solutions that resulted in mutations on a relation (i.e., the * #of distinct and new solutions). This will be zero unless the rule is * writing on a relation.
*
mutations/sec
*
The #of mutations per second.
*
* The following are only present for individual {@link IRule} execution * records. Each of these is an array containing one element per tail * predicate in the {@link IRule}. *
*
evalOrder
*
The evaluation order for the predicate(s) in the rule.
*
keyOrder
*
The {@link IKeyOrder} for the predicate(s) in the rule. Basically, * this tells you which index was used for each predicate.
*
nvars
*
The #of variables that will be unbound in the predicate when it is * evaluated (this is a function of the selected evaluation plan).
*
rangeCount
*
The #of elements predicated for each tail predicate in the rule by * the {@link IRangeCountFactory} on behalf of the {@link IEvaluationPlan}.
*
chunkCount
*
The #of chunks that were generated for the left-hand side of the JOIN * for each predicate in the tail of the rule.
*
elementCount
*
The #of elements that were actually visited for each tail predicate * in the rule.
*
subqueryCount
*
The #of subqueries issued for the right-hand side of the JOIN for * each predicate in the tail of the rule.
*
tailIndex
*
The index in which the tail predicate(s) for rule were declared for * the rule. This information is present iff * joinDetails == true was specified. Since the tail predicates * will be written into the table in this order, this information is mainly * of use if you want to resort the table while keeping the relationship * between the predicate evaluation order and the declared predicate order.
*
tailPredicate
*
The tail predicate(s) for rule in the order in which they were * declared for the rule. This information is present iff * joinDetails == true was specified. When present, the tail * predicate details will be found on their own rows in the table and will * be aligned under the column whose details are being broken out.
*
* * @todo collect data on the size of the subquery results. A large #of * subqueries with a small number of results each is the main reason * to unroll the JOIN loops. * * @todo we are actually aggregating the elapsed time, which is a no-no as * pointed out above when there are rules executing concurrently. */ public String getHeadings() { return "startTime"// + sep + "rule"// + sep + "elapsed"// // + sep + "solutionCount"// + sep + "solutions/sec" + sep + "mutationCount" // + sep + "mutations/sec"// // + sep + "evalOrder" // + sep + "keyOrder" // + sep + "nvars" + sep + "rangeCount" // + sep + "chunkCount" // + sep + "elementCount"// + sep + "subqueryCount"// // + sep + "tailIndex"// + sep + "tailPredicate"// ; } /** * Reports just the data for this record. * * @param depth * The depth at which the record was encountered within some * top-level aggregation. * @param titles * When true the titles will be displayed inline, * e.g., foo=12 vs 12. * @param joinDetails * When true , presents a tabular display of the * details for each JOIN IFF this {@link RuleStats} was collected * for an {@link IRule} (rather than an aggregation of * {@link IRule}). */ public String toStringSimple(final int depth, final boolean titles, final boolean joinDetails) { // the symbol used when a count is zero. final String ZE = "0"; // the symbol used when a count was zero, so count/sec is also zero. final String NA = "0"; // the symbol used when the elapsed time was zero, so count/sec is divide by zero. final String DZ = "0"; final long solutionCount = this.solutionCount.get(); final String solutionCountStr = solutionCount == 0 ? ZE : "" + solutionCount; final String solutionsPerSec = (solutionCount == 0 ? NA // : (elapsed == 0L ? DZ // : "" + (long) (solutionCount * 1000d / elapsed))); final long mutationCount = this.mutationCount.get(); final String mutationCountStr = mutationCount == 0 ? ZE : "" + mutationCount; final String mutationsPerSec = (mutationCount == 0 ? NA // : (elapsed == 0L ? DZ // : "" + (long) (mutationCount * 1000d / elapsed))); final String q = ""; // '\"'; final StringBuilder sb = new StringBuilder(); final String ruleNameStr = "\"" + depthStr.substring(0, depth) + name.replace(",", "") + (closureRound == 0 ? "" : " round#" + closureRound) + "\""; /* * Note: This is the same format that is used for the performance * counters. This makes it easier to correlate what is going on in the * query execution log with the performance counter data. */ final DateFormat dateFormat = DateFormat.getDateTimeInstance( DateFormat.MEDIUM/* date */, DateFormat.MEDIUM/* time */); final String dateStr = dateFormat.format(startTime).replace(",", ""); // final String sep = titles ? ", " : this.sep; sb.append((titles?"startTime=":"") + dateStr); sb.append(sep + (titles ? "rule=" : "") + ruleNameStr); sb.append(sep + (titles ? "elapsed=" : "") + elapsed); sb.append(sep + (titles ? "solutionCount=" : "") + solutionCountStr); sb.append(sep + (titles ? "solutions/sec=" : "") + solutionsPerSec); sb.append(sep + (titles ? "mutationCount=" : "") + mutationCountStr); sb.append(sep + (titles ? "mutations/sec=" : "") + mutationsPerSec); if(!aggregation) { if(!joinDetails) { sb.append(sep+(titles?"evalOrder=":"")+q+toString(evalOrder)+q); sb.append(sep+(titles?"keyOrder=":"")+q+toString(keyOrder)+q); sb.append(sep+(titles?"nvars=":"")+q+toString(nvars)+q); sb.append(sep+(titles?"rangeCount=":"")+q+ toString(rangeCount)+q); sb.append(sep+(titles?"chunkCount=":"")+q+ toString(chunkCount)+q); sb.append(sep+(titles?"elementCount=":"")+q+ toString(elementCount)+q); sb.append(sep+(titles?"subqueryCount=":"")+q+toString(subqueryCount)+q); } else { final IRule r = (IRule)rule; final int tailCount = r.getTailCount(); for (int tailIndex = 0; tailIndex < tailCount; tailIndex++) { if (tailIndex > 0) sb.append("\n"+dateStr+sep+ruleNameStr+(sep+sep+sep+sep+sep));//",,,,,"); final int i = showInEvalOrder?evalOrder[tailIndex]:tailIndex; final int orderIndex = showInEvalOrder?tailIndex:permutation[i]; sb.append(sep+orderIndex); sb.append(sep+keyOrder[i]); sb.append(sep+nvars[i]); sb.append(sep+rangeCount[i]); sb.append(sep+chunkCount[i]); sb.append(sep+elementCount[i]); sb.append(sep+subqueryCount[i]); sb.append(sep+i); sb.append(sep+"\""+toString(r.getTail(i)).replace(",", " ")+"\""); } } } return sb.toString(); } /** * When true {@link #toStringSimple(int, boolean, boolean)} * will show the table view with the predicates in evaluation order rather * than the given order. This view is easier to read when you are examining * the join performance but you have to indirect through the tail index to * relate the predicates back to the original query form (which may already * have been re-ordered so that is not a great loss in many cases). */ final private static boolean showInEvalOrder = true; /** * Return a human readable representation of the predicate. Subclasses may * be created that know how to externalize the predicate correctly for its * relation. It is a good idea to strip commas from the representation so * that the table can be more readily imported into worksheets that exhibit * problems with quoted strings embedding commas. This can be done using * {@link String#replace(CharSequence, CharSequence)}. * * @param pred * A predicate from the tail of an {@link IRule}. * * @return The representation of that predicate. */ protected String toString(IPredicate pred) { return pred.toString(); } private String depthStr = "........."; private StringBuilder toString(final int[] a) { final StringBuilder sb = new StringBuilder(); sb.append("["); for (int i = 0; i < a.length; i++) { if (i > 0) sb.append(" "); sb.append(a[i]); } sb.append("]"); return sb; } private StringBuilder toString(final Object[] a) { final StringBuilder sb = new StringBuilder(); sb.append("["); for (int i = 0; i < a.length; i++) { if (i > 0) sb.append(" "); sb.append(a[i]); } sb.append("]"); return sb; } private StringBuilder toString(final long[] a) { final StringBuilder sb = new StringBuilder(); sb.append("["); for (int i = 0; i < a.length; i++) { if (i > 0) sb.append(" "); sb.append(a[i]); } sb.append("]"); return sb; } /** * Reports aggregate and details. */ public String toString() { return toString(0L/* minElapsed */, true/* joinDetails */); } /** * Set this to [true] if you want a single rule to be formatted in a table, * just like a set of rules. Set it to [false] if you want a single rule all * on one line using [title=value] for each column. */ static private final boolean showSingleRuleInTable = true; /** * * @param minElapsed * The minimum elapsed time for which details will be shown. * @param joinDetails * When true, also presents a tabular display of * the details for each JOIN in each {@link IRule}. */ public String toString(final long minElapsed, final boolean joinDetails) { final int depth = 0; if (detailStats.isEmpty() && !showSingleRuleInTable) { return toStringSimple(depth, true/* titles */, joinDetails); } final StringBuilder sb = new StringBuilder(); // Note: uses Vector.toArray() to avoid concurrent modification issues. final RuleStats[] a = detailStats.toArray(new RuleStats[] {}); // aggregate level. sb.append("\n" + getHeadings()); sb.append("\n" + toStringSimple(depth, false/* titles */, joinDetails)); toString(minElapsed, joinDetails, depth+1, sb, a); sb.append("\n"); return sb.toString(); } private StringBuilder toString(final long minElapsed, final boolean joinDetails, final int depth, final StringBuilder sb, final RuleStats[] a) { // detail level. for (int i = 0; i < a.length; i++) { final RuleStats x = (RuleStats) a[i]; if (x.elapsed >= minElapsed) { sb.append("\n" + x.toStringSimple(depth, false/* titles */, joinDetails)); if (x.aggregation && !x.detailStats.isEmpty()) { toString(minElapsed, joinDetails, depth + 1, sb, x.detailStats.toArray(new RuleStats[] {})); } } } return sb; } /** * When execution {@link RuleState}s are being aggregated, this will contain * the individual {@link RuleStats} for each execution {@link RuleState}. */ public List detailStats = new Vector(); /** * Aggregates statistics. *

* Note: since the mutation count as reported by each buffer is cumulative * we DO NOT aggregate the mutation counts from sub-steps as it would cause * double-counting when steps are executed in parallel. *

* Instead, once the total program is finished, the total mutation count is * computed as the value reported by {@link IBuffer#flush()} for each buffer * on which the program writes. * * @param o * Statistics for another rule. * * @see ProgramTask#executeMutation(IStep) * @see MutationTask#flushBuffers(IJoinNexus, RuleStats, java.util.Map) */ synchronized public void add(final RuleStats o) { if (o == null) throw new IllegalArgumentException(); detailStats.add(o); if (elementCount != null && o.elementCount != null) { for (int i = 0; i < elementCount.length; i++) { // Note: order[] is NOT aggregated. // Note: keyOrder[] is NOT aggregated. // Note: nvars[] is NOT aggregated. rangeCount[i] += o.rangeCount[i]; chunkCount[i] += o.chunkCount[i]; elementCount[i] += o.elementCount[i]; subqueryCount[i] += o.subqueryCount[i]; } } solutionCount.addAndGet( o.solutionCount.get() ); // See javadoc above. // mutationCount.addAndGet(o.mutationCount.get()); elapsed += o.elapsed; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy