![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.bop.join.PipelineJoinStats Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Feb 12, 2012
*/
package com.bigdata.bop.join;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.bop.join.PipelineJoin.Annotations;
import com.bigdata.counters.CAT;
import com.bigdata.relation.accesspath.IAccessPath;
/**
* Extended statistics for the join operator.
*/
public class PipelineJoinStats extends BaseJoinStats {
private static final long serialVersionUID = 1L;
/**
* The #of input solutions consumed (not just accepted).
*
* Note: This counter is highly correlated with {@link BOpStats#unitsIn} but
* is incremented only when we begin evaluation of the {@link IAccessPath}
* associated with a specific input solution.
*
* When {@link Annotations#COALESCE_DUPLICATE_ACCESS_PATHS} is
* true
, multiple input binding sets can be mapped onto the
* same {@link IAccessPath} and this counter will be incremented by the #of
* such input binding sets.
*/
public final CAT inputSolutions = new CAT();
/**
* The #of output solutions generated. This is incremented as soon as the
* solution is produced and is used by {@link #getJoinHitRatio()}. Of
* necessity, updates to {@link #inputSolutions} slightly lead updates to
* {@link #inputSolutions}.
*
* Note: This counter is highly correlated with {@link BOpStats#unitsOut}.
*/
public final CAT outputSolutions = new CAT();
/**
* The estimated join hit ratio. This is computed as
*
*
* outputSolutions / inputSolutions
*
*
* It is ZERO (0) when {@link #inputSolutions} is ZERO (0).
*
* The join hit ratio is always accurate when the join is fully executed.
* However, when a cutoff join is used to estimate the join hit ratio a
* measurement error can be introduced into the join hit ratio unless
* {@link Annotations#COALESCE_DUPLICATE_ACCESS_PATHS} is false
,
* {@link Annotations#MAX_PARALLEL} is GT ONE (1), or
* {@link Annotations#MAX_PARALLEL_CHUNKS} is GT ZERO (0).
*
* When access paths are coalesced because there is an inner loop over the
* input solutions mapped onto the same access path. This inner loop the
* causes {@link PipelineJoinStats#inputSolutions} to be incremented by the
* #of coalesced access paths before any {@link #outputSolutions}
* are counted. Coalescing access paths therefore can cause the join hit
* ratio to be underestimated as there may appear to be more input solutions
* consumed than were actually applied to produce output solutions if the
* join was cutoff while processing a set of input solutions which were
* identified as using the same as-bound access path.
*
* The worst case can introduce substantial error into the estimated join
* hit ratio. Consider a cutoff of 100
. If one input solution
* generates 100 output solutions and two input solutions are mapped onto
* the same access path, then the input count will be 2 and the output count
* will be 100, which gives a reported join hit ration of 100/2
* when the actual join hit ratio is 100/1
.
*
* A similar problem can occur if {@link Annotations#MAX_PARALLEL} or
* {@link Annotations#MAX_PARALLEL_CHUNKS} is GT ONE (1) since input count
* can be incremented by the #of threads before any output solutions are
* generated. Estimation error can also occur if multiple join tasks are run
* in parallel for different chunks of input solutions.
*/
public double getJoinHitRatio() {
final long in = inputSolutions.get();
final long out = outputSolutions.get();
if (in == 0)
return 0;
return ((double) out) / in;
}
@Override
public void add(final BOpStats o) {
super.add(o);
if (o instanceof PipelineJoinStats) {
final PipelineJoinStats t = (PipelineJoinStats) o;
inputSolutions.add(t.inputSolutions.get());
outputSolutions.add(t.outputSolutions.get());
// if (t.fanIn > this.fanIn) {
// // maximum reported fanIn for this join dimension.
// this.fanIn = t.fanIn;
// }
// if (t.fanOut > this.fanOut) {
// // maximum reported fanOut for this join dimension.
// this.fanOut += t.fanOut;
// }
}
}
@Override
protected void toString(final StringBuilder sb) {
sb.append(",inputSolutions=" + inputSolutions.get());
sb.append(",outputSolutions=" + outputSolutions.get());
sb.append(",joinHitRatio=" + getJoinHitRatio());
}
}