com.bigdata.rdf.graph.impl.GASState Maven / Gradle / Ivy
Copyright (C) SYSTAP, LLC 2006-2012. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package com.bigdata.rdf.graph.impl;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.query.algebra.evaluation.util.ValueComparator;
import com.bigdata.rdf.graph.Factory;
import com.bigdata.rdf.graph.IGASContext;
import com.bigdata.rdf.graph.IGASEngine;
import com.bigdata.rdf.graph.IGASProgram;
import com.bigdata.rdf.graph.IGASSchedulerImpl;
import com.bigdata.rdf.graph.IGASState;
import com.bigdata.rdf.graph.IGraphAccessor;
import com.bigdata.rdf.graph.IReducer;
import com.bigdata.rdf.graph.IStaticFrontier;
import com.bigdata.rdf.graph.util.GASUtil;
public class GASState implements IGASState {
private static final Logger log = Logger.getLogger(GASState.class);
// /**
// * The {@link GASEngine} on which the {@link IGASProgram} will be run.
// */
// private final GASEngine gasEngine;
* When true
the frontier will be sorted.
private final boolean sortFrontier;
* The {@link IGASProgram} to be run.
private final IGASProgram gasProgram;
* Factory for the vertex state objects.
private final Factory vsf;
* Factory for the edge state objects.
private final Factory esf;
* The set of vertices that were identified in the current iteration.
* Note: This data structure is reused for each round.
* @see IStaticFrontier
* @see IGASSchedulerImpl
* @see #scheduler
private final IStaticFrontier frontier;
* Used to schedule the new frontier and then compact it onto
* {@link #frontier} at the end of the round.
private final IGASSchedulerImpl scheduler;
* The current evaluation round.
private final AtomicInteger round = new AtomicInteger(0);
* The state associated with each visited vertex.
* TODO Offer scalable backend with high throughput, e.g., using a batched
* striped lock as per DISTINCT (we might be better off with such large
* visited sets using a full traveral strategy, but overflow to an HTree or
* (if fixed stride) a MemStore or BigArray could help).
protected final ConcurrentMap vertexState = new ConcurrentHashMap();
* TODO EDGE STATE: state needs to be configurable. When disabled, leave
* this as null
protected final ConcurrentMap edgeState = null;
* Provides access to the backing graph. Used to decode vertices and edges
* for {@link #traceState()}.
private final IGraphAccessor graphAccessor;
* Used to establish a total ordering over RDF {@link Value}s.
private final Comparator valueComparator;
public GASState(final IGASEngine gasEngine,//
final IGraphAccessor graphAccessor, //
final IStaticFrontier frontier,//
final IGASSchedulerImpl gasScheduler,//
final IGASProgram gasProgram//
) {
if (gasEngine == null)
throw new IllegalArgumentException();
if (graphAccessor == null)
throw new IllegalArgumentException();
if (frontier == null)
throw new IllegalArgumentException();
if (gasScheduler == null)
throw new IllegalArgumentException();
if (gasProgram == null)
throw new IllegalArgumentException();
this.sortFrontier = gasEngine.getSortFrontier();
this.graphAccessor = graphAccessor;
this.gasProgram = gasProgram;
this.vsf = gasProgram.getVertexStateFactory();
this.esf = gasProgram.getEdgeStateFactory();
this.frontier = frontier;
this.scheduler = gasScheduler;
* TODO This is the SPARQL value ordering. It might be not be total or
* stable. If not, we can use an ordering over the string values of the
* RDF Values, but that will push the heap.
this.valueComparator = new ValueComparator();
* Provides access to the backing graph. Used to decode vertices and edges
* for {@link #traceState()}.
protected IGraphAccessor getGraphAccessor() {
return graphAccessor;
public IStaticFrontier frontier() {
return frontier;
public IGASSchedulerImpl getScheduler() {
return scheduler;
public VS getState(final Value v) {
VS vs = vertexState.get(v);
if (vs == null) {
VS old = vertexState.putIfAbsent(v, vs = vsf.initialValue(v));
if (old != null) {
// Lost data race.
vs = old;
return vs;
public boolean isVisited(final Value v) {
return vertexState.get(v) != null;
public boolean isVisited(final Set v) {
return vertexState.keySet().containsAll(v);
public ES getState(final Statement e) {
if (edgeState == null)
return null;
ES es = edgeState.get(e);
if (es == null) {
ES old = edgeState.putIfAbsent(e, es = esf.initialValue(e));
if (old != null) {
// Lost data race.
es = old;
return es;
* TODO batch parallel in java 8.
public void retainAll(final Set retainSet) {
for (Value v : vertexState.keySet()) {
if (!retainSet.contains(v)) {
public int round() {
return round.get();
public void reset() {
if (edgeState != null)
frontier.resetFrontier(0/* minCapacity */, false/* sortFrontier */,
public void setFrontier(final IGASContext ctx, final Value... vertices) {
if (vertices == null)
throw new IllegalArgumentException();
// Used to ensure that the initial frontier is distinct.
final Set tmp = new HashSet();
for (Value v : vertices) {
tmp.add(asValue(v)); // convert to internal form and impose distinct.
* Callback to initialize the vertex state before the first iteration.
for (Value v : tmp) {
gasProgram.initVertex(ctx, this, v);
// Reset the frontier.
frontier.resetFrontier(tmp.size()/* minCapacity */, tmp.size() > 1
&& sortFrontier, tmp.iterator());
public void traceState() {
if (log.isInfoEnabled())
log.info("Round=" + round + ", frontierSize=" + frontier().size()
+ ", vertexStateSize=" + vertexState.size());
public void endRound() {
* {@inheritDoc}
* FIXME REDUCE : parallelize with nthreads. The reduce operations are often
* lightweight, so maybe a fork/join pool would work better?
* Note: We can not do a parallel reduction right now because the backing
* class does not expose a parallel iterator, e.g., a segment-wise iterator.
* The reduction over the {@link #vertexState} is quite slow as a result.
* It looks like bulk parallel operators will be eventually introduced into
* the Java concurrency collections. For now, it seems like the short term
* solution would be to drop them onto stripped lists at the same time that
* they are first inserted into the CHM. I could then read over those
* striped lists in parallel during the reduction.
* The IReducer should run with parallel threads. This is a huge serial
* bottleneck right now. Fixing this will require a data structure with a
* parallel iterator, not the CHM. See
* http://stackoverflow.com/questions/20164690
* /using-scalas-parhashmap-in-javas-project-instead-of-concurrenthashmap
* It seems like the short term solution would be to drop them onto striped
* lists at the same time that they are first inserted into the CHM. I could
* then read over those striped lists in parallel during the reduction.
public T reduce(final IReducer op) {
for (Value v : vertexState.keySet()) {
op.visit(this, v);
return op.get();
public String toString(final Statement e) {
return e.toString();
* This is a set of methods for making tests concerning the nature of an
* edge, vertex, link attribute, or property value. These tests are gathered
* together in one place because (a) the openrdf data model does not provide
* efficient link attribute management; and (b) it is not safe to use
* instanceof tests on the IV implementations to decide these questions.
* The IV interface classes can actually declare multiple interfaces that
* are distinct in the openrdf model. For example, a TermId can be any kind
* of Value and implements the URI, BNode, and Literal interfaces. Thus,
* interface testing on a TermId IV is not diagnostic. However, the IVs are
* explicitly marked with isURI(), isBNode(), and isLiteral() methods. These
* methods must be used in preference to interface tests (instanceof tests
* will provide unexpected behavior).
* FIXME TESTS: We need a test suite for compliance of these methods.
* {@inheritDoc}
* FIXME We can optimize this to use reference testing if we are careful in
* the GATHER and SCATTER implementations to impose a canonical mapping over
* the vertex objects for the edges that are exposed to the
* {@link IGASProgram} during a single round.
public Value getOtherVertex(final Value u, final Statement e) {
if (e.getSubject().equals(u))
return e.getObject();
return e.getSubject();
* This will only work for the BigdataGASState.
public Literal getLinkAttr(final Value u, final Statement e) {
return null;
public boolean isEdge(final Statement e) {
return e.getObject() instanceof Resource;
public boolean isAttrib(final Statement e) {
return !isEdge(e);
* {@inheritDoc}
* The openrdf implementation does not support link attributes and this
* method always returns false
* @return false
public boolean isLinkAttrib(final Statement e,
final URI linkAttribType) {
return false;
* {@inheritDoc}
* The openrdf implementation does not support link attributes and this
* method always returns null
* @return null
public Statement decodeStatement(final Value v) {
return null;
public int compareTo(final Value u, final Value v) {
final int ret = valueComparator.compare(u, v);
// log.error("ret=" + ret + ", u=" + u + ", v=" + v); // FIXME REMOVE
return ret;
public Value asValue(final Value value) {
return value;
// public Set values() {
// return vertexState.keySet();
// }