com.bigdata.rdf.graph.impl.GASContext Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC 2006-2012. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.bigdata.rdf.graph.impl;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import com.bigdata.rdf.graph.EdgesEnum;
import com.bigdata.rdf.graph.IGASContext;
import com.bigdata.rdf.graph.IGASProgram;
import com.bigdata.rdf.graph.IGASScheduler;
import com.bigdata.rdf.graph.IGASState;
import com.bigdata.rdf.graph.IGASStats;
import com.bigdata.rdf.graph.IGraphAccessor;
import com.bigdata.rdf.graph.IReducer;
import com.bigdata.rdf.graph.IStaticFrontier;
import com.bigdata.rdf.graph.TraversalDirectionEnum;
import com.bigdata.rdf.graph.util.GASUtil;
import cutthecrap.utils.striterators.Filter;
import cutthecrap.utils.striterators.IFilter;
public class GASContext implements IGASContext {
private static final Logger log = Logger.getLogger(GASContext.class);
private final GASEngine gasEngine;
/**
* Used to access the graph (a KB instance).
*/
private final IGraphAccessor graphAccessor;
/**
* This {@link IGASState}.
*/
private final IGASState gasState;
/**
* The graph analytic to be executed.
*/
private final IGASProgram program;
/**
* Whether or not the edges of the graph will be traversed with directed
* graph semantics (default is {@link TraversalDirectionEnum#Forward}).
*/
private final AtomicReference traversalDirection = new AtomicReference(
TraversalDirectionEnum.Forward);
/**
* The maximum number of iterations (defaults to {@link Integer#MAX_VALUE}).
*/
private final AtomicInteger maxIterations = new AtomicInteger(
Integer.MAX_VALUE);
/**
* The maximum number of vertices (defaults to {@link Integer#MAX_VALUE}).
*/
private final AtomicInteger maxVertices = new AtomicInteger(
Integer.MAX_VALUE);
/**
* An optional constraint on the type of the visited links.
*/
private final AtomicReference linkType = new AtomicReference(null);
/**
* An optional constraint on the type of the visited link attributes.
*/
private final AtomicReference linkAttributeType = new AtomicReference(null);
/**
* An optional {@link IReducer} that will executed after the
* {@link IGASProgram}.
*/
private final AtomicReference> afterOp = new AtomicReference>(
null);
/**
* A collection of target vertices for the program to reach.
*/
private final Set targetVertices =
Collections.synchronizedSet(new LinkedHashSet());
/**
* The maximum number of iterations after the target vertices have been
* reached. Default behavior is to continue on even after the targets have
* been reached.
*/
private final AtomicInteger maxIterationsAfterTargets = new AtomicInteger(
Integer.MAX_VALUE);
/**
*
* @param namespace
* The namespace of the graph (KB instance).
* @param timestamp
* The timestamp of the graph view (this should be a read-only
* view for non-blocking index reads).
* @param gasProgram
* The program to execute against that graph.
*/
public GASContext(final GASEngine gasEngine,
final IGraphAccessor graphAccessor,
final IGASState gasState,
final IGASProgram gasProgram) {
if (gasEngine == null)
throw new IllegalArgumentException();
if (graphAccessor == null)
throw new IllegalArgumentException();
if (gasState == null)
throw new IllegalArgumentException();
if (gasProgram == null)
throw new IllegalArgumentException();
this.gasEngine = gasEngine;
this.graphAccessor = graphAccessor;
this.program = gasProgram;
this.gasState = gasState;
}
@Override
public IGASState getGASState() {
return gasState;
}
@Override
public IGASProgram getGASProgram() {
return program;
}
@Override
public IGraphAccessor getGraphAccessor() {
return graphAccessor;
}
@Override
public IGASStats call() throws Exception {
final GASStats total = new GASStats();
program.before(this);
if (log.isTraceEnabled()) {
log.trace("# of targets: " + targetVertices.size());
log.trace("max iterations after targets: " + maxIterationsAfterTargets.get());
}
while (!gasState.frontier().isEmpty()) {
/*
* Check halting conditions.
*
* Note: We could also halt on maxEdges since that is tracked in the
* GASStats.
*/
if (targetVertices.size() > 0 &&
getMaxIterationsAfterTargets() < Integer.MAX_VALUE) {
if (gasState.isVisited(targetVertices)) {
/*
* If we've reached all target vertices then halt the
* program N rounds from now where
* N = maxIterationsAfterTargets.
*/
synchronized(this.maxIterations) {
this.maxIterations.set(Math.min(getMaxIterations(),
(int) total.getNRounds() + getMaxIterationsAfterTargets()));
}
if (log.isTraceEnabled()) {
log.trace("All targets reached at round " +
total.getNRounds() + ", halting at round " +
this.maxIterations.get());
}
}
}
if (total.getNRounds() + 1 > getMaxIterations()) {
log.warn("Halting: maxIterations=" + getMaxIterations()
+ ", #rounds=" + total.getNRounds());
break;
}
if (total.getFrontierSize() >= getMaxVisited()) {
log.warn("Halting: maxVertices=" + getMaxVisited()
+ ", frontierSize=" + total.getFrontierSize());
break;
}
final GASStats roundStats = new GASStats();
doRound(roundStats);
total.add(roundStats);
}
if (log.isInfoEnabled())
log.info("Done: " + total);
gasState.traceState();
// Optional post-reduction.
{
final IReducer op = getRunAfterOp();
if (op != null) {
gasState.reduce(op);
}
}
// Done
return total;
}
/**
* {@inheritDoc}
*
* TODO This is an Asynchronous implementation. Further, it does not attempt
* to race ahead to accelerate convergence (unlike an asynchronous neural
* network).
*
* TODO There should be an option for property value access during the APPLY
* (either no property values are required, or some (or all) are required
* and must optionally be materialized. Likewise, there could be an option
* to force the materialization of the URIs for the (s,p,o).
*
* Property value access is on the SPO index. If we are doing a reverse
* gather (out-edges) then it will be right there and the Apply should be
* pushed into the Gather. If we are doing a forward gather (in-edges), then
* we are reading on OSP and will need to do a separate read on SPO.
*/
@Override
public boolean doRound(final IGASStats stats) throws InterruptedException,
ExecutionException, Exception {
// The fontier for this round.
final IStaticFrontier f = gasState.frontier();
// Conditionally log the computation state.
gasState.traceState();
/*
* TODO This logic allows us to push down the APPLY into the GATHER or
* SCATTER depending on some characteristics of the algorithm. Is this
* worth while?
*
* Note: The ability to push down the APPLY for AllEdges for the GATHER
* depends on our using the union of the in-edges and out-edges
* iterators to visit those edges. That union means that we do not have
* to preserve the accumulant across the in-edges and out-edges aspects
* of the GATHER. If this UNION over the iterators causes problems with
* other optimizations, then it could be discarded. Note that this is
* not an issue for the SCATTER since we can scatter over the in-edges
* and out-edges for any given vertex independently (so long as the
* APPLY is done before the SCATTER - this would not work if we pushed
* down the APPLY into the SCATTER).
*/
final EdgesEnum gatherEdges = getTraversalDirection().asTraversed(
program.getGatherEdges());
final EdgesEnum scatterEdges = getTraversalDirection().asTraversed(
program.getScatterEdges());
final boolean pushDownApplyInGather;
final boolean pushDownApplyInScatter;
final boolean runApplyStage;
if (gatherEdges != EdgesEnum.NoEdges) {
// Do APPLY() in GATHER.
pushDownApplyInGather = true;
pushDownApplyInScatter = false;
runApplyStage = false;
} else if (scatterEdges != EdgesEnum.NoEdges) {
// APPLY() in SCATTER.
pushDownApplyInGather = false;
pushDownApplyInScatter = true;
runApplyStage = false;
} else {
// Do not push down the APPLY.
pushDownApplyInGather = false;
pushDownApplyInScatter = false;
runApplyStage = true;
}
/*
* GATHER
*/
final long beginGather = System.nanoTime();
final long gatherEdgeCount;
if (gatherEdges == EdgesEnum.NoEdges) {
gatherEdgeCount = 0L;
} else {
gatherEdgeCount = gatherEdges(graphAccessor, f, gatherEdges,
pushDownApplyInGather);
}
final long elapsedGather = System.nanoTime() - beginGather;
/*
* APPLY
*/
final long elapsedApply;
if (runApplyStage) {
final long beginApply = System.nanoTime();
apply(f);
elapsedApply = System.nanoTime() - beginApply;
} else {
elapsedApply = 0L;
}
/*
* SCATTER
*/
final long beginScatter = System.nanoTime();
final long scatterEdgeCount;
if (scatterEdges == EdgesEnum.NoEdges) {
scatterEdgeCount = 0L;
} else {
scatterEdgeCount = scatterEdges(graphAccessor, f,
gasState.getScheduler(), scatterEdges,
pushDownApplyInScatter);
}
final long elapsedScatter = System.nanoTime() - beginScatter;
/*
* Reporting.
*/
final long totalElapsed = elapsedGather + elapsedApply + elapsedScatter;
final long totalEdges = scatterEdgeCount + gatherEdgeCount;
stats.add(f.size(), totalEdges, totalElapsed);
if (log.isInfoEnabled()) {
log.info("\ntotal"//
+ ": fontierSize="
+ f.size() //
+ ", ms="
+ TimeUnit.NANOSECONDS.toMillis(totalElapsed)//
+ ", edges="
+ totalEdges//
+ ", teps="
+ GASUtil.getTEPS(totalEdges, totalElapsed)//
+ "\ngather"//
+ ": ms="
+ TimeUnit.NANOSECONDS.toMillis(elapsedGather)//
+ ", nedges="
+ gatherEdgeCount//
+ ", fanIn="
+ GASUtil.fanOut(f.size(), gatherEdgeCount)//
+ ", teps="
+ GASUtil.getTEPS(gatherEdgeCount, elapsedGather) //
+ (runApplyStage ? ", apply="
+ TimeUnit.NANOSECONDS.toMillis(elapsedApply) : "")//
+ "\nscatter"//
+ ": ms="
+ TimeUnit.NANOSECONDS.toMillis(elapsedScatter)//
+ ", nedges="
+ scatterEdgeCount //
+ ", fanOut="
+ GASUtil.fanOut(f.size(), scatterEdgeCount) //
+ ", teps="
+ GASUtil.getTEPS(scatterEdgeCount, elapsedScatter)//
);
}
// End the round, advance the counter, and compact new frontier.
gasState.endRound();
/*
* Handshake with the GASProgram. If it votes to continue -OR- the new
* frontier is not empty, then we will do another round.
*/
final boolean nextRound = program.nextRound(this) || !gasState.frontier().isEmpty();
if(nextRound) {
/*
* Optionally advance the view of the graph before the next round.
*/
graphAccessor.advanceView();
}
return nextRound;
} // doRound()
/**
* Do APPLY.
*
* @return The #of vertices for which the operation was executed.
*
* @throws Exception
*/
private void apply(final IStaticFrontier f) throws Exception {
// for (Value u : f) {
//
// program.apply(gasState, u, null/* sum */);
//
// }
// Note: Return value of ApplyReducer is currently ignored.
reduceOverFrontier(f, new ApplyReducer());
}
private class ApplyReducer implements IReducer {
@Override
public void visit(final IGASState state, final Value u) {
program.apply(state, u, null/* sum */);
}
@Override
public T get() {
// Note: Nothing returned right now.
return null;
}
}
/**
* Reduce over the frontier (used for apply()).
*
* @param f
* The frontier.
* @param op
* The {@link IReducer}.
*
* @return The {@link IReducer#get() result}.
*
* @throws Exception
*/
public T reduceOverFrontier(final IStaticFrontier f,
final IReducer op) throws Exception {
if (f == null)
throw new IllegalArgumentException();
if (op == null)
throw new IllegalArgumentException();
class ReduceVertexTaskFactory implements VertexTaskFactory {
@Override
public Callable newVertexTask(final Value u) {
return new Callable() {
@Override
public Long call() {
// program.apply(gasState, u, null/* sum */);
op.visit(gasState, u);
// Nothing returned by visit().
return ONE;
};
};
};
}
gasEngine.newFrontierStrategy(new ReduceVertexTaskFactory(), f).call();
// Return reduction.
return op.get();
}
private static final Long ONE = Long.valueOf(1L);
/**
* @param inEdges
* when true
the GATHER is over the in-edges.
* Otherwise it is over the out-edges.
* @param pushDownApply
* When true
, the APPLY() will be done during the
* GATHER.
*
* @throws ExecutionException
* @throws InterruptedException
*/
private long scatterEdges(final IGraphAccessor graphAccessor,
final IStaticFrontier f, final IGASScheduler sch,
final EdgesEnum scatterEdges, final boolean pushDownApply)
throws InterruptedException, ExecutionException, Exception {
if (scatterEdges == null)
throw new IllegalArgumentException();
class ScatterVertexTaskFactory implements VertexTaskFactory {
public Callable newVertexTask(final Value u) {
return new ScatterTask(u) {
@Override
protected boolean pushDownApply() {
return pushDownApply;
}
@Override
protected EdgesEnum getEdgesEnum() {
return scatterEdges;
}
@Override
protected IGASScheduler scheduler() {
return sch;
}
@Override
protected IGraphAccessor graphAccessor() {
return graphAccessor;
}
};
};
}
return gasEngine.newFrontierStrategy(new ScatterVertexTaskFactory(), f)
.call();
}
/**
* @param gatherEdges
* The edges to be gathered.
* @param pushDownApply
* When true
, the APPLY() will be done during the
* GATHER.
*
* @throws ExecutionException
* @throws InterruptedException
*/
private long gatherEdges(final IGraphAccessor graphAccessor,
final IStaticFrontier f, //final IScheduler sch,
final EdgesEnum gatherEdges, final boolean pushDownApply)
throws InterruptedException, ExecutionException, Exception {
if (gatherEdges == null)
throw new IllegalArgumentException();
class GatherVertexTaskFactory implements VertexTaskFactory {
public Callable newVertexTask(final Value u) {
return new GatherTask(u) {
@Override
protected boolean pushDownApply() {
return pushDownApply;
}
@Override
protected EdgesEnum getEdgesEnum() {
return gatherEdges;
}
/**
* Note: The API does not permit vertices to be scheduled
* for execution during the GATHER phase.
*/
@Override
protected IGASScheduler scheduler() {
throw new UnsupportedOperationException();
}
@Override
protected IGraphAccessor graphAccessor() {
return graphAccessor;
}
};
};
}
return gasEngine.newFrontierStrategy(new GatherVertexTaskFactory(), f)
.call();
}
/**
* Base class for SCATTER or GATHER of edges for a vertex.
*
* Note: An abstract task pattern is used to factor out parameters that are
* constants within the scope of the scatter for each vertex in the
* frontier.
*
* @author Bryan
* Thompson
*/
abstract private class VertexEdgesTask implements Callable {
protected final Value u;
public VertexEdgesTask(final Value u) {
this.u = u;
}
abstract protected IGraphAccessor graphAccessor();
abstract protected boolean pushDownApply();
abstract protected EdgesEnum getEdgesEnum();
abstract protected IGASScheduler scheduler();
}
/**
* Scatter for the edges of a single vertex.
*
* @author Bryan
* Thompson
*/
abstract private class ScatterTask extends VertexEdgesTask {
public ScatterTask(final Value u) {
super(u);
}
/**
* Execute the scatter for the vertex.
*
* @return The #of visited edges.
*/
@Override
public Long call() throws Exception {
final boolean TRACE = log.isTraceEnabled();
if (pushDownApply()) {
/*
* Run the APPLY as part of the SCATTER.
*
* TODO This can be done on a thread pool or fork/join pool
* since we know that there are no duplicates in the frontier.
*/
program.apply(gasState, u, null/* sum */);
}
if (!program.isChanged(gasState, u)) {
// Unchanged. Do not scatter.
return 0L;
}
/*
* Visit the (in|out)-edges of that vertex.
*/
long nedges = 0L;
final IGASScheduler sch = scheduler();
final Iterator eitr = graphAccessor.getEdges(
GASContext.this, u, getEdgesEnum());
try {
while (eitr.hasNext()) {
// edge
final Statement e = eitr.next();
nedges++;
if (TRACE) // TODO Batch resolve if @ TRACE
log.trace("e=" + gasState.toString(e));
program.scatter(gasState, sch, u, e);
}
} finally {
// eitr.close();
}
return nedges;
}
} // ScatterTask
/**
* Gather for the edges of a single vertex.
*
* @author Bryan
* Thompson
*/
abstract private class GatherTask extends VertexEdgesTask {
public GatherTask(final Value u) {
super(u);
}
@Override
public Long call() throws Exception {
long nedges = 0;
final Iterator eitr = graphAccessor.getEdges(
GASContext.this, u, getEdgesEnum());
try {
/*
* Note: since (left,right) may be null, we need to known if
* left is defined.
*/
boolean first = true;
ST left = null;
while (eitr.hasNext()) {
final Statement e = eitr.next();
if (log.isTraceEnabled()) // TODO Batch resolve if @ TRACE
log.trace("u=" + u + ", e=" + gasState.toString(e) + ", sum="
+ left);
final ST right = program.gather(gasState, u, e);
if (first) {
left = right;
first = false;
} else {
left = program.sum(gasState, left, right);
}
}
if (pushDownApply()) {
/*
* Run the APPLY as part of the GATHER.
*
* TODO This can be done on a thread pool or fork/join pool
* since we know that there are no duplicates in the
* frontier.
*/
program.apply(gasState, u, left/* sum */);
}
} finally {
// eitr.close();
}
return nedges;
}
} // GatherTask
@Override
public void setMaxIterations(final int newValue) {
if (newValue <= 0)
throw new IllegalArgumentException();
this.maxIterations.set(newValue);
}
@Override
public TraversalDirectionEnum getTraversalDirection() {
return traversalDirection.get();
}
@Override
public void setTraversalDirection(final TraversalDirectionEnum newVal) {
if (newVal == null)
throw new IllegalArgumentException();
traversalDirection.set(newVal);
}
@Override
public int getMaxIterations() {
return maxIterations.get();
}
@Override
public void setMaxVisited(int newValue) {
if (newValue <= 0)
throw new IllegalArgumentException();
this.maxVertices.set(newValue);
}
@Override
public int getMaxVisited() {
return maxVertices.get();
}
@Override
public URI getLinkType() {
return linkType.get();
}
@Override
public void setLinkType(final URI linkType) {
this.linkType.set(linkType);
}
@Override
public URI getLinkAttributeType() {
return linkAttributeType.get();
}
@Override
public void setLinkAttributeType(final URI linkAttributeType) {
this.linkAttributeType.set(linkAttributeType);
}
@Override
public void setTargetVertices(final Value[] targetVertices) {
this.targetVertices.addAll(Arrays.asList(targetVertices));
}
@Override
public Set getTargetVertices() {
return this.targetVertices;
}
@Override
public void setMaxIterationsAfterTargets(final int newValue) {
if (newValue < 0)
throw new IllegalArgumentException();
this.maxIterationsAfterTargets.set(newValue);
}
@Override
public int getMaxIterationsAfterTargets() {
return maxIterationsAfterTargets.get();
}
// /**
// * {@inheritDoc}
// *
// * The default implementation only visits the edges.
// */
// @Override
// public IStriterator getConstrainEdgeFilter(final IStriterator itr) {
//
// return itr.addFilter(getEdgeOnlyFilter());
//
// }
// /**
// * Return an {@link IFilter} that will only visit the edges of the graph.
// *
// * @see IGASState#isEdge(Statement)
// */
// protected IFilter getEdgeOnlyFilter() {
//
// return new EdgeOnlyFilter(this);
//
// }
//
// /**
// * Filter visits only edges (filters out attribute values).
// *
// * Note: This filter is pushed down onto the AP and evaluated close to the
// * data.
// */
// private class EdgeOnlyFilter extends Filter {
// private static final long serialVersionUID = 1L;
// private final IGASState gasState;
// private EdgeOnlyFilter(final IGASContext ctx) {
// this.gasState = ctx.getGASState();
// }
// @Override
// public boolean isValid(final Object e) {
// return gasState.isEdge((Statement) e);
// }
// };
/**
* Return a filter that only visits the edges of graph that are instances of
* the specified link attribute type.
*
* Note: For bigdata, the visited edges can be decoded to recover the
* original link as well.
*
* @see IGASState#isLinkAttrib(Statement, URI)
* @see IGASState#decodeStatement(Value)
*/
protected IFilter getLinkAttribFilter(final IGASContext ctx,
final URI linkAttribType) {
return new LinkAttribFilter(ctx, linkAttribType);
}
/**
* Filter visits only edges where the {@link Statement} is an instance of
* the specified link attribute type. For bigdata, the visited edges can be
* decoded to recover the original link as well.
*/
private class LinkAttribFilter extends Filter {
private static final long serialVersionUID = 1L;
private final IGASState gasState;
private final URI linkAttribType;
public LinkAttribFilter(final IGASContext ctx,
final URI linkAttribType) {
if (linkAttribType == null)
throw new IllegalArgumentException();
this.gasState = ctx.getGASState();
this.linkAttribType = linkAttribType;
}
@Override
public boolean isValid(final Object e) {
return gasState.isLinkAttrib((Statement) e, linkAttribType);
}
}
@Override
public void setRunAfterOp(final IReducer afterOp) {
this.afterOp.set(afterOp);
}
@SuppressWarnings("unchecked")
@Override
public IReducer getRunAfterOp() {
return (IReducer) afterOp.get();
}
} // GASContext