com.bigdata.rdf.graph.impl.ram.RAMGASEngine Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC 2006-2012. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.bigdata.rdf.graph.impl.ram;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.sail.SailException;
import com.bigdata.rdf.graph.EdgesEnum;
import com.bigdata.rdf.graph.IGASContext;
import com.bigdata.rdf.graph.IGraphAccessor;
import com.bigdata.rdf.graph.impl.EdgeOnlyFilter;
import com.bigdata.rdf.graph.impl.GASEngine;
import com.bigdata.rdf.graph.impl.util.VertexDistribution;
import cutthecrap.utils.striterators.EmptyIterator;
import cutthecrap.utils.striterators.IStriterator;
import cutthecrap.utils.striterators.Striterator;
public class RAMGASEngine extends GASEngine {
public RAMGASEngine(int nthreads) {
super(nthreads);
}
/**
* Returns false
. There is no intrinsic ordering that can
* improve access for this implementation.
*/
@Override
public boolean getSortFrontier() {
return false;
}
/**
* A simple RDF graph model suitable for graph mining algorithms.
*
* TODO This model does not support link weights. It was developed to
* provide an implementation without any object encode/decode overhead that
* could be used to explore the possible performance of GAS algorithms under
* Java.
*
* @author Bryan
* Thompson
*/
static public class RAMGraph {
private final ValueFactory vf;
public ValueFactory getValueFactory() {
return vf;
}
/**
* From a vertex, we can visit the in-edges, out-edges, or attribute
* values. These things are organized into three sets of statements. A
* non-thread-safe collection is used to provide the distinct semantics
* for those sets and fast traversal. This design precludes the ability
* to concurrently modify the graph during graph traversal operations.
*/
static private class Vertex {
/** The {@link Value} for that {@link Vertex}. */
final private Value v;
/**
* The distinct set of in-edges for this {@link Vertex}.
*
* The {@link Statement#getObject()} for each {@link Statement} in
* this collection will be the {@link #v}.
*/
private Set inEdges = null;
/**
* The distinct set of out-edges for this {@link Vertex}.
*
* The {@link Statement#getSubject()} for each {@link Statement} in
* this collection will be the {@link #v}.
*/
private Set outEdges = null;
/**
* The distinct set of property values for this {@link Vertex}.
*
* The {@link Statement#getSubject()} for each {@link Statement} in
* this collection will be the {@link #v}.
*
* The {@link Statement#getObject()} for each {@link Statement} in
* this collection will be a {@link URI}.
*/
private Set attribs = null;
public Vertex(final Value v) {
if (v == null)
throw new NullPointerException();
this.v = v;
}
@Override
public String toString() {
return "Vertex{" + v + ",inEdges=" + getInEdgeCount()
+ ",outEdges=" + getOutEdgeCount() + ",attribs="
+ getAttribCount() + "}";
}
private boolean addAttrib(final Statement s) {
if (attribs == null) {
attribs = new LinkedHashSet();
}
return attribs.add(s);
}
private boolean addOutEdge(final Statement s) {
if (outEdges == null) {
outEdges = new LinkedHashSet();
}
return outEdges.add(s);
}
private boolean addInEdge(final Statement s) {
if (inEdges == null) {
inEdges = new LinkedHashSet();
}
return inEdges.add(s);
}
public int getAttribCount() {
return attribs == null ? 0 : attribs.size();
}
public int getInEdgeCount() {
return inEdges == null ? 0 : inEdges.size();
}
public int getOutEdgeCount() {
return outEdges == null ? 0 : outEdges.size();
}
public Iterator inEdges() {
if (inEdges == null)
return EmptyIterator.DEFAULT;
return inEdges.iterator();
}
public Iterator outEdges() {
if (outEdges == null)
return EmptyIterator.DEFAULT;
return outEdges.iterator();
}
public Iterator attribs() {
if (attribs == null)
return EmptyIterator.DEFAULT;
return attribs.iterator();
}
}
/**
* The vertices.
*/
private final ConcurrentMap vertices;
public RAMGraph() {
vertices = new ConcurrentHashMap();
vf = new ValueFactoryImpl();
}
/**
* Lookup / create a vertex.
*
* @param x
* The {@link Value}.
* @param create
* when true
the {@link Vertex} will be created
* if it does not exist.
*
* @return The {@link Vertex}.
*/
private Vertex get(final Value x, final boolean create) {
Vertex v = vertices.get(x);
if (v == null && create) {
final Vertex oldVal = vertices
.putIfAbsent(x, v = new Vertex(x));
if (oldVal != null) {
// lost data race.
v = oldVal;
}
}
return v;
}
public boolean add(final Statement st) {
final Resource s = st.getSubject();
final Value o = st.getObject();
boolean modified = false;
if (o instanceof URI) {
// Edge
modified |= get(s, true/* create */).addOutEdge(st);
modified |= get(o, true/* create */).addInEdge(st);
} else {
// Property value.
modified |= get(s, true/* create */).addAttrib(st);
}
return modified;
}
public Iterator inEdges(final Value v) {
final Vertex x = get(v, false/* create */);
if (x == null)
return EmptyIterator.DEFAULT;
return x.inEdges();
}
public Iterator outEdges(final Value v) {
final Vertex x = get(v, false/* create */);
if (x == null)
return EmptyIterator.DEFAULT;
return x.outEdges();
}
public Iterator attribs(final Value v) {
final Vertex x = get(v, false/* create */);
if (x == null)
return EmptyIterator.DEFAULT;
return x.attribs();
}
} // class RAMGraph
static public class RAMGraphAccessor implements IGraphAccessor {
private final RAMGraph g;
public RAMGraphAccessor(final RAMGraph g) {
if (g == null)
throw new IllegalArgumentException();
this.g = g;
}
@Override
public void advanceView() {
// NOP
}
@Override
public long getEdgeCount(final IGASContext, ?, ?> ctx, final Value u,
final EdgesEnum edges) {
long n = 0L;
final Iterator itr = getEdges(ctx, u, edges);
while (itr.hasNext()) {
itr.next();
n++;
}
return n;
}
@SuppressWarnings("unchecked")
@Override
public Iterator getEdges(final IGASContext, ?, ?> ctx,
final Value u, final EdgesEnum edges) {
try {
switch (edges) {
case NoEdges:
return EmptyIterator.DEFAULT;
case InEdges:
return getEdges(true/* inEdges */, ctx, u);
case OutEdges:
return getEdges(false/* inEdges */, ctx, u);
case AllEdges: {
final IStriterator a = getEdges(true/* inEdges */, ctx, u);
final IStriterator b = getEdges(false/* outEdges */, ctx, u);
a.append(b);
return a;
}
default:
throw new UnsupportedOperationException(edges.name());
}
} catch (SailException ex) {
throw new RuntimeException(ex);
}
}
private IStriterator getEdges(final boolean inEdges,
final IGASContext, ?, ?> ctx, final Value u)
throws SailException {
final URI linkTypeIV = (URI) ctx.getLinkType();
if(linkTypeIV != null) {
/*
* FIXME RDR: We need to use a union of access paths for link
* attributes for the generic SAIL since it does not have the
* concept of statements about statements. This will require
* applying the access paths that will visit the appropriate
* reified triples. This needs to be done for both the standard
* path and the POS optimization code path.
*/
throw new UnsupportedOperationException();
}
final Striterator sitr;
if(inEdges) {
sitr = new Striterator(g.get(u, false/*create*/).inEdges());
} else {
sitr = new Striterator(g.get(u, false/*create*/).outEdges());
}
/*
* Optionally wrap the program specified filter.
*/
// return ctx.getConstrainEdgeFilter(sitr);
sitr.addFilter(new EdgeOnlyFilter(ctx));
return sitr;
}
@Override
public VertexDistribution getDistribution(final Random r) {
final VertexDistribution sample = new VertexDistribution(r);
for (RAMGraph.Vertex vertex : g.vertices.values()) {
final Value v = vertex.v;
if (v instanceof Resource) {
/*
* FIXME This is not ignoring self-loops. Realistically, we
* want to include them in the data since they are part of
* the data, but we do not want to consider them in samples
* since they do not actually go anywhere. The SAIL and BD
* implementations of this method filter out self-loops, but
* this implementation does not.
*/
if (vertex.getInEdgeCount() > 0)
sample.addInEdgeSample((Resource) v);
if (vertex.getOutEdgeCount() > 0)
sample.addOutEdgeSample((Resource) v);
}
}
return sample;
}
}
}