com.bigdata.relation.rule.eval.RuleStats Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Oct 30, 2007
*/
package com.bigdata.relation.rule.eval;
import java.text.DateFormat;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicLong;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.joinGraph.IEvaluationPlan;
import com.bigdata.bop.joinGraph.IRangeCountFactory;
import com.bigdata.relation.IMutableRelation;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.ISlice;
import com.bigdata.relation.rule.IStep;
import com.bigdata.relation.rule.Rule;
import com.bigdata.service.ILoadBalancerService;
import com.bigdata.striterator.IKeyOrder;
/**
* Statistics about what an {@link IStep} did when it was executed.
*
* Program execution has the general form of either a set of {@link IStep}s
* executed, at least logically, in parallel, or a sequence of {@link IStep}s
* executed in sequence. An {@link IStep} may be a closure operation of one or
* more {@link IRule}s, even when it is the top-level {@link IStep}. Inside of
* a closure operation, there are one or more rounds and each rule in the
* closure will be run in each round. There is no implicit order on the rules
* within a closure operation, but they may be forced to execute in a sequence
* if the total program execution context forces sequential execution.
*
* In order to aggregate the data on rule execution, we want to roll up the data
* for the individual rules along the same lines as the program structure.
*
* @todo Report as counters aggregated by the {@link ILoadBalancerService}?
*
* @author mikep
* @author Bryan Thompson
* @version $Id$
*/
public class RuleStats {
/**
* Delimiter string used for output.
*/
private final static transient String sep = ", ";//"\t";//", ";
/**
* Initializes statistics for an {@link IStep}.
*
* Note: This form is used when statistics will be aggregated across the
* execution of multiple {@link IStep}s, such as when computing the closure
* (fix point) of a set of {@link IRule}s or a sequential {@link IProgram}.
*
* @param step
* The {@link IStep}.
*/
public RuleStats(final IStep step) {
this.rule = step;
this.name = step.getName();
if (step.isRule()) {
final int tailCount = ((IRule) step).getTailCount();
this.evalOrder = new int[tailCount];
this.permutation = new int[tailCount];
this.keyOrder = new IKeyOrder[tailCount];
this.nvars = new int[tailCount];
this.chunkCount = new long[tailCount];
this.subqueryCount = new int[tailCount];
this.elementCount = new long[tailCount];
this.rangeCount = new long[tailCount];
} else {
this.evalOrder = null;
this.permutation = null;
this.keyOrder = null;
this.nvars = null;
this.chunkCount = null;
this.subqueryCount = null;
this.elementCount = null;
this.rangeCount = null;
}
// this.nexecutions = 0;
this.aggregation = true;
}
/**
* Initializes statistics from an {@link iRule} and its
* {@link IEvaluationPlan}.
*
* Note: This ctor variant makes available the order of execution and range
* count metadata from the {@link IEvaluationPlan}.
*
* @param rule
* The {@link IRule}.
* @param plan
* The {@link IEvaluationPlan}.
* @param keyOrder
* Identifies which index will be used at each step in the
* evaluation plan (the indices are correlated with the tail
* predicate index, not the evaluation order index).
*/
public RuleStats(final IRuleState ruleState) {
this(ruleState.getRule());
final IRule rule = ruleState.getRule();
final IEvaluationPlan plan = ruleState.getPlan();
final int tailCount = rule.getTailCount();
System.arraycopy(ruleState.getNVars(), 0, this.nvars, 0, tailCount);
System.arraycopy(plan.getOrder(), 0, this.evalOrder, 0, tailCount);
System.arraycopy(ruleState.getKeyOrder(), 0, this.keyOrder, 0, tailCount);
/*
* Construct the permutation of the tail index order for the rule that
* corresponds to the evaluation order.
*
* permutation[i] is the sequence in the evaluation order at which
* tail[i] is evaluated.
*/
for (int i = 0; i < tailCount; i++) {
permutation[evalOrder[i]] = i;
}
for (int i = 0; i < tailCount; i++) {
/*
* Copy range count data, etc.
*
* @todo This data is normally cached, but for some rules and some
* evaluation planners the plan is formed without requesting some
* (or any) of the range counts. An optimization would allow us to
* NOT force a range count if it was not already take.
*
* However, a caching IRangeCountFactory might hide these costs
* anyway.
*/
rangeCount[ i ] = plan.rangeCount(i);
}
this.aggregation = false;
}
/**
* True iff this is an aggregation of individual rule executions.
*/
private boolean aggregation;
/**
* The name of the rule.
*/
public final String name;
/**
* The {@link IStep} that was executed.
*/
public final IStep rule;
/**
* The round is zero unless this is a closure operations and then it is an
* integer in [1:nrounds].
*/
public int closureRound = 0;
/**
* The #of {@link ISolution}s computed by the rule regardless of whether or
* not they are written onto an {@link IMutableRelation} and regardless of
* whether or not they duplicate a solution already computed.
*
* Note: this counter will be larger than the actual #of solutions generated
* when evaluating an {@link ISlice} since the pattern used is to invoke
* {@link AtomicLong#incrementAndGet()} and add the {@link ISolution} to the
* {@link IBuffer} iff the post-increment value is LT
* {@link ISlice#getLast()}. Since we are working with the post-increment
* value when handling an {@link ISlice} this is naturally one larger than
* the #of {@link ISolution}s actually added to the {@link IBuffer} if we
* halt processing because we have reached the limit on the {@link ISlice}.
*/
public AtomicLong solutionCount = new AtomicLong();
/**
* The #of elements that were actually added to (or removed from) the
* relation indices. This is updated based on the {@link IMutableRelation}
* API. Correct reporting by that API and correct aggregation here are
* critical to the correct termination of the fix point of some rule set.
*
* Note: This value is ONLY incremented when the {@link IBuffer} is flushed.
*
* Note: Each {@link IRule} will have its own {@link RuleStats} on which it
* reports its mutation count. However, since evaluation of {@link IRule}s
* MAY proceed in parallel and since multiple {@link IRule}s can write on
* the same {@link IBuffer}, the mutation counts can not be credited
* unambiguously to any given rule when rules run in parallel.
*
* Note: {@link IBuffer#flush()} maintains a running mutationCount.
* Therefore only the end state of that counter should be set on
* {@link #mutationCount}. Otherwise the reported {@link #mutationCount}
* may overreport the actual mutation count.
*
* @see #add(RuleStats)
*/
public AtomicLong mutationCount = new AtomicLong();
/**
* The start time for the rule execution. This is approximate (it is
* initialized when the {@link RuleStats} instance is created). The
* startTime is mainly intended for use when correlating collected
* performance counters with query execution.
*/
public final long startTime = System.currentTimeMillis();
/**
* Time to compute the entailments (ms).
*/
public long elapsed;
/*
* The following are only available for the execution of a single rule.
*/
/**
* The #of unbound variables for the predicates in the tail of the
* {@link Rule} (only available at the detail level of a single rule
* instance execution). The array is correlated with the predicates index in
* the tail of the rule NOT with its evaluation order.
*/
public final int[] nvars;
/**
* The order of execution of the predicates in the body of a rule (only
* available at the detail level of a single rule instance execution). When
* aggregated, the {@link #evalOrder} will always contain zeros since it can
* not be meaningfully combined across executions of either the same or
* different rules.
*/
public final int[] evalOrder;
/**
* The permutation of the tail predicate index order for an {@link IRule}
* (only available when the {@link IStep} is an {@link IRule}) that
* corresponds to the evaluation order of the tail predicate in the
* {@link IRule}. permutation[i]
is the sequence in the
* evaluation order at which tail[i]
was evaluated.
*/
public final int[] permutation;
/**
* An array of the {@link IKeyOrder} that was used for each predicate in the
* tail of the rule. The array is correlated with the predicates index in
* the tail of the rule NOT with its evaluation order.
*/
public final IKeyOrder[] keyOrder;
/**
* The predicated range counts for each predicate in the body of the rule
* (in the order in which they were declared, not the order in which they
* were evaluated) as reported by the {@link IRangeCountFactory}. The range
* counts are used by the {@link IEvaluationPlan}. You can compare the
* {@link #elementCount}s with the {@link #rangeCount}s to see the actual
* vs predicated #of elements visited per predicate.
*/
public final long[] rangeCount;
/**
* The #of chunks materialized for each predicate in the body of the rule
* (in the order in which they were declared, not the order in which they
* were evaluated).
*/
public final long[] chunkCount;
/**
* The #of elements considered for each predicate in the body of the rule
* (in the order in which they were declared, not the order in which they
* were evaluated).
*/
public final long[] elementCount;
/**
* The #of subqueries examined for each predicate in the rule (in the order
* in which they were declared, not the order in which they were evaluated).
* While there are N indices for a rule with N predicates, we only evaluate
* a subquery for N-1 predicates so at least one index will always be
* zero(0).
*/
public final int[] subqueryCount;
/**
* Returns the headings.
*
* The following are present for every record.
*
* - startTime
* - Timestamp taken when the rule begins to execute (approximate). This
* is primarily used to correlate performance counters with query execution.
*
* - rule
* - The name of the rule.
* - elapsed
* - Elapsed execution time in milliseconds. When the rules are executed
* concurrently the individual times will not be additive (they will sum to
* more than the elapsed clock time owing to parallel execution).
* - solutionCount
* - The #of solutions computed.
* - solutions/sec
* - The #of solutions computed per second.
* - mutationCount
* - The #of solutions that resulted in mutations on a relation (i.e., the
* #of distinct and new solutions). This will be zero unless the rule is
* writing on a relation.
* - mutations/sec
* - The #of mutations per second.
*
* The following are only present for individual {@link IRule} execution
* records. Each of these is an array containing one element per tail
* predicate in the {@link IRule}.
*
* - evalOrder
* - The evaluation order for the predicate(s) in the rule.
* - keyOrder
* - The {@link IKeyOrder} for the predicate(s) in the rule. Basically,
* this tells you which index was used for each predicate.
* - nvars
* - The #of variables that will be unbound in the predicate when it is
* evaluated (this is a function of the selected evaluation plan).
* - rangeCount
* - The #of elements predicated for each tail predicate in the rule by
* the {@link IRangeCountFactory} on behalf of the {@link IEvaluationPlan}.
* - chunkCount
* - The #of chunks that were generated for the left-hand side of the JOIN
* for each predicate in the tail of the rule.
* - elementCount
* - The #of elements that were actually visited for each tail predicate
* in the rule.
* - subqueryCount
* - The #of subqueries issued for the right-hand side of the JOIN for
* each predicate in the tail of the rule.
* - tailIndex
* - The index in which the tail predicate(s) for rule were declared for
* the rule. This information is present iff
*
joinDetails == true
was specified. Since the tail predicates
* will be written into the table in this order, this information is mainly
* of use if you want to resort the table while keeping the relationship
* between the predicate evaluation order and the declared predicate order.
* - tailPredicate
* - The tail predicate(s) for rule in the order in which they were
* declared for the rule. This information is present iff
*
joinDetails == true
was specified. When present, the tail
* predicate details will be found on their own rows in the table and will
* be aligned under the column whose details are being broken out.
*
*
* @todo collect data on the size of the subquery results. A large #of
* subqueries with a small number of results each is the main reason
* to unroll the JOIN loops.
*
* @todo we are actually aggregating the elapsed time, which is a no-no as
* pointed out above when there are rules executing concurrently.
*/
public String getHeadings() {
return "startTime"//
+ sep + "rule"//
+ sep + "elapsed"//
//
+ sep + "solutionCount"//
+ sep + "solutions/sec"
+ sep + "mutationCount" //
+ sep + "mutations/sec"//
//
+ sep + "evalOrder" //
+ sep + "keyOrder" //
+ sep + "nvars"
+ sep + "rangeCount" //
+ sep + "chunkCount" //
+ sep + "elementCount"//
+ sep + "subqueryCount"//
//
+ sep + "tailIndex"//
+ sep + "tailPredicate"//
;
}
/**
* Reports just the data for this record.
*
* @param depth
* The depth at which the record was encountered within some
* top-level aggregation.
* @param titles
* When true
the titles will be displayed inline,
* e.g., foo=12
vs 12
.
* @param joinDetails
* When true
, presents a tabular display of the
* details for each JOIN IFF this {@link RuleStats} was collected
* for an {@link IRule} (rather than an aggregation of
* {@link IRule}).
*/
public String toStringSimple(final int depth, final boolean titles,
final boolean joinDetails) {
// the symbol used when a count is zero.
final String ZE = "0";
// the symbol used when a count was zero, so count/sec is also zero.
final String NA = "0";
// the symbol used when the elapsed time was zero, so count/sec is divide by zero.
final String DZ = "0";
final long solutionCount = this.solutionCount.get();
final String solutionCountStr = solutionCount == 0 ? ZE : "" + solutionCount;
final String solutionsPerSec = (solutionCount == 0 ? NA //
: (elapsed == 0L ? DZ //
: "" + (long) (solutionCount * 1000d / elapsed)));
final long mutationCount = this.mutationCount.get();
final String mutationCountStr = mutationCount == 0 ? ZE : "" + mutationCount;
final String mutationsPerSec = (mutationCount == 0 ? NA //
: (elapsed == 0L ? DZ //
: "" + (long) (mutationCount * 1000d / elapsed)));
final String q = ""; // '\"';
final StringBuilder sb = new StringBuilder();
final String ruleNameStr = "\"" + depthStr.substring(0, depth) + name.replace(",", "")
+ (closureRound == 0 ? "" : " round#" + closureRound) + "\"";
/*
* Note: This is the same format that is used for the performance
* counters. This makes it easier to correlate what is going on in the
* query execution log with the performance counter data.
*/
final DateFormat dateFormat = DateFormat.getDateTimeInstance(
DateFormat.MEDIUM/* date */, DateFormat.MEDIUM/* time */);
final String dateStr = dateFormat.format(startTime).replace(",", "");
// final String sep = titles ? ", " : this.sep;
sb.append((titles?"startTime=":"") + dateStr);
sb.append(sep + (titles ? "rule=" : "") + ruleNameStr);
sb.append(sep + (titles ? "elapsed=" : "") + elapsed);
sb.append(sep + (titles ? "solutionCount=" : "") + solutionCountStr);
sb.append(sep + (titles ? "solutions/sec=" : "") + solutionsPerSec);
sb.append(sep + (titles ? "mutationCount=" : "") + mutationCountStr);
sb.append(sep + (titles ? "mutations/sec=" : "") + mutationsPerSec);
if(!aggregation) {
if(!joinDetails) {
sb.append(sep+(titles?"evalOrder=":"")+q+toString(evalOrder)+q);
sb.append(sep+(titles?"keyOrder=":"")+q+toString(keyOrder)+q);
sb.append(sep+(titles?"nvars=":"")+q+toString(nvars)+q);
sb.append(sep+(titles?"rangeCount=":"")+q+ toString(rangeCount)+q);
sb.append(sep+(titles?"chunkCount=":"")+q+ toString(chunkCount)+q);
sb.append(sep+(titles?"elementCount=":"")+q+ toString(elementCount)+q);
sb.append(sep+(titles?"subqueryCount=":"")+q+toString(subqueryCount)+q);
} else {
final IRule r = (IRule)rule;
final int tailCount = r.getTailCount();
for (int tailIndex = 0; tailIndex < tailCount; tailIndex++) {
if (tailIndex > 0)
sb.append("\n"+dateStr+sep+ruleNameStr+(sep+sep+sep+sep+sep));//",,,,,");
final int i = showInEvalOrder?evalOrder[tailIndex]:tailIndex;
final int orderIndex = showInEvalOrder?tailIndex:permutation[i];
sb.append(sep+orderIndex);
sb.append(sep+keyOrder[i]);
sb.append(sep+nvars[i]);
sb.append(sep+rangeCount[i]);
sb.append(sep+chunkCount[i]);
sb.append(sep+elementCount[i]);
sb.append(sep+subqueryCount[i]);
sb.append(sep+i);
sb.append(sep+"\""+toString(r.getTail(i)).replace(",", " ")+"\"");
}
}
}
return sb.toString();
}
/**
* When true
{@link #toStringSimple(int, boolean, boolean)}
* will show the table view with the predicates in evaluation order rather
* than the given order. This view is easier to read when you are examining
* the join performance but you have to indirect through the tail index to
* relate the predicates back to the original query form (which may already
* have been re-ordered so that is not a great loss in many cases).
*/
final private static boolean showInEvalOrder = true;
/**
* Return a human readable representation of the predicate. Subclasses may
* be created that know how to externalize the predicate correctly for its
* relation. It is a good idea to strip commas from the representation so
* that the table can be more readily imported into worksheets that exhibit
* problems with quoted strings embedding commas. This can be done using
* {@link String#replace(CharSequence, CharSequence)}.
*
* @param pred
* A predicate from the tail of an {@link IRule}.
*
* @return The representation of that predicate.
*/
protected String toString(IPredicate pred) {
return pred.toString();
}
private String depthStr = ".........";
private StringBuilder toString(final int[] a) {
final StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i = 0; i < a.length; i++) {
if (i > 0)
sb.append(" ");
sb.append(a[i]);
}
sb.append("]");
return sb;
}
private StringBuilder toString(final Object[] a) {
final StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i = 0; i < a.length; i++) {
if (i > 0)
sb.append(" ");
sb.append(a[i]);
}
sb.append("]");
return sb;
}
private StringBuilder toString(final long[] a) {
final StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i = 0; i < a.length; i++) {
if (i > 0)
sb.append(" ");
sb.append(a[i]);
}
sb.append("]");
return sb;
}
/**
* Reports aggregate and details.
*/
public String toString() {
return toString(0L/* minElapsed */, true/* joinDetails */);
}
/**
* Set this to [true] if you want a single rule to be formatted in a table,
* just like a set of rules. Set it to [false] if you want a single rule all
* on one line using [title=value] for each column.
*/
static private final boolean showSingleRuleInTable = true;
/**
*
* @param minElapsed
* The minimum elapsed time for which details will be shown.
* @param joinDetails
* When true
, also presents a tabular display of
* the details for each JOIN in each {@link IRule}.
*/
public String toString(final long minElapsed, final boolean joinDetails) {
final int depth = 0;
if (detailStats.isEmpty() && !showSingleRuleInTable) {
return toStringSimple(depth, true/* titles */, joinDetails);
}
final StringBuilder sb = new StringBuilder();
// Note: uses Vector.toArray() to avoid concurrent modification issues.
final RuleStats[] a = detailStats.toArray(new RuleStats[] {});
// aggregate level.
sb.append("\n" + getHeadings());
sb.append("\n" + toStringSimple(depth, false/* titles */, joinDetails));
toString(minElapsed, joinDetails, depth+1, sb, a);
sb.append("\n");
return sb.toString();
}
private StringBuilder toString(final long minElapsed,
final boolean joinDetails, final int depth, final StringBuilder sb,
final RuleStats[] a) {
// detail level.
for (int i = 0; i < a.length; i++) {
final RuleStats x = (RuleStats) a[i];
if (x.elapsed >= minElapsed) {
sb.append("\n" + x.toStringSimple(depth, false/* titles */, joinDetails));
if (x.aggregation && !x.detailStats.isEmpty()) {
toString(minElapsed, joinDetails, depth + 1, sb,
x.detailStats.toArray(new RuleStats[] {}));
}
}
}
return sb;
}
/**
* When execution {@link RuleState}s are being aggregated, this will contain
* the individual {@link RuleStats} for each execution {@link RuleState}.
*/
public List detailStats = new Vector();
/**
* Aggregates statistics.
*
* Note: since the mutation count as reported by each buffer is cumulative
* we DO NOT aggregate the mutation counts from sub-steps as it would cause
* double-counting when steps are executed in parallel.
*
* Instead, once the total program is finished, the total mutation count is
* computed as the value reported by {@link IBuffer#flush()} for each buffer
* on which the program writes.
*
* @param o
* Statistics for another rule.
*
* @see ProgramTask#executeMutation(IStep)
* @see MutationTask#flushBuffers(IJoinNexus, RuleStats, java.util.Map)
*/
synchronized public void add(final RuleStats o) {
if (o == null)
throw new IllegalArgumentException();
detailStats.add(o);
if (elementCount != null && o.elementCount != null) {
for (int i = 0; i < elementCount.length; i++) {
// Note: order[] is NOT aggregated.
// Note: keyOrder[] is NOT aggregated.
// Note: nvars[] is NOT aggregated.
rangeCount[i] += o.rangeCount[i];
chunkCount[i] += o.chunkCount[i];
elementCount[i] += o.elementCount[i];
subqueryCount[i] += o.subqueryCount[i];
}
}
solutionCount.addAndGet( o.solutionCount.get() );
// See javadoc above.
// mutationCount.addAndGet(o.mutationCount.get());
elapsed += o.elapsed;
}
}