org.cloudgraph.hbase.graph.ParallelSliceSubgraphTask Maven / Gradle / Ivy
/**
* CloudGraph Community Edition (CE) License
*
* This is a community release of CloudGraph, a dual-license suite of
* Service Data Object (SDO) 2.1 services designed for relational and
* big-table style "cloud" databases, such as HBase and others.
* This particular copy of the software is released under the
* version 2 of the GNU General Public License. CloudGraph was developed by
* TerraMeta Software, Inc.
*
* Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved.
*
* General License information can be found below.
*
* This distribution may include materials developed by third
* parties. For license and attribution notices for these
* materials, please refer to the documentation that accompanies
* this distribution (see the "Licenses for Third-Party Components"
* appendix) or view the online documentation at
* .
*/
package org.cloudgraph.hbase.graph;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.cloudgraph.common.concurrent.ConfigProps;
import org.cloudgraph.common.concurrent.SubgraphTask;
import org.cloudgraph.config.TableConfig;
import org.cloudgraph.hbase.io.DistributedReader;
import org.cloudgraph.hbase.io.RowReader;
import org.cloudgraph.hbase.io.TableReader;
import org.cloudgraph.state.GraphState;
import org.cloudgraph.state.GraphState.Edge;
import org.plasma.query.collector.Selection;
import org.plasma.query.model.Where;
import org.plasma.sdo.PlasmaDataObject;
import org.plasma.sdo.PlasmaProperty;
import org.plasma.sdo.PlasmaType;
import commonj.sdo.Property;
/**
* A concurrent assembly task which assembles a sub-graph "sliced"
* using any number of path predicates
* starting with a given "sub root" based on the
* given "selection graph".
* Processing proceeds as a breadth-first
* traversal and additional tasks are dynamically spawned based on thread availability
* within a shared thread pool. If thread availability is exhausted, processing proceeds
* within the current thread.
*
* @see GraphSliceSupport
* @see DistributedReader
* @see RowReader
*
* @author Scott Cinnamond
* @since 0.6.2
*/
//package protection
class ParallelSliceSubgraphTask extends DefaultSubgraphTask implements SubgraphTask {
private static Log log = LogFactory.getLog(ParallelSliceSubgraphTask.class);
private GraphSliceSupport sliceSupport;
public ParallelSliceSubgraphTask(PlasmaDataObject subroot,
Selection selection,
Timestamp snapshotDate,
DistributedReader distributedReader,
PlasmaDataObject source,
PlasmaProperty sourceProperty,
RowReader rowReader,
int level, int sequence,
ThreadPoolExecutor executorService,
ConfigProps config) {
super(subroot,selection,snapshotDate,distributedReader,source,sourceProperty,rowReader,
level,sequence, executorService, config);
this.sliceSupport = new GraphSliceSupport(selection,snapshotDate);
}
@Override
protected SubgraphTask newTask(PlasmaDataObject subroot,
Selection selection, Timestamp snapshotDate,
DistributedReader distributedReader, PlasmaDataObject source,
PlasmaProperty sourceProperty, RowReader rowReader, int level,
int sequence, ThreadPoolExecutor executorService, ConfigProps config) {
return new ParallelSliceSubgraphTask(subroot,selection,snapshotDate,distributedReader,source,sourceProperty,rowReader,
level,sequence, executorService, config);
}
@Override
protected void assemble(PlasmaDataObject target, PlasmaDataObject source,
PlasmaProperty sourceProperty, RowReader rowReader, int level)
throws IOException {
Set props = this.getProperties(target, source, sourceProperty, level);
if (props.size() == 0)
return;
if (log.isDebugEnabled())
log.debug("assembling("+level+"): " + target.toString() + ": " + props.toString());
// synchronize on row-reader here rather than target because row-reader
// uses shared column key factory
synchronized (rowReader) {
assembleData(target, props, rowReader);
}
TableReader tableReader = rowReader.getTableReader();
TableConfig tableConfig = tableReader.getTableConfig();
traversals.clear();
// reference props
for (Property p : props) {
PlasmaProperty prop = (PlasmaProperty)p;
if (prop.getType().isDataType())
continue;
byte[] keyValue = getColumnValue(target, prop,
tableConfig, rowReader);
if (keyValue == null || keyValue.length == 0 ) {
continue; // zero length can happen on modification or delete as we keep cell history
}
if (log.isDebugEnabled())
log.debug(prop.getName() + ": " + Bytes.toString(keyValue));
Edge[] edges = rowReader.getGraphState().unmarshalEdges(
keyValue);
if (edges.length == 0) {
continue; // zero length can happen on modification or delete as we keep cell history
}
PlasmaType childType = (PlasmaType)prop.getType();
// NOTE: can we have predicates on singular props?
Where where = this.selection.getPredicate(prop);
boolean external = isExternal(edges, rowReader);
if (!external) {
Set sequences = null;
if (prop.isMany() && where != null) {
sequences = this.sliceSupport.fetchSequences((PlasmaType)prop.getType(),
where, rowReader);
// preload properties for the NEXT level into the current row so we have something to assemble
Set childProperies = this.selection.getInheritedProperties(prop.getType(), level+1);
this.sliceSupport.loadBySequenceList(sequences,
childProperies,
childType, rowReader);
}
else {
// preload properties for the NEXT level into the current row so we have something to assemble
Set childProperies = this.selection.getInheritedProperties(prop.getType(), level+1);
this.sliceSupport.load(childProperies,
childType, rowReader);
}
assembleEdges(target, prop, edges, sequences, rowReader,
rowReader.getTableReader(),
rowReader, level);
}
else
{
String childTable = rowReader.getGraphState().getRowKeyTable(edges[0].getUuid());
TableReader externalTableReader = distributedReader.getTableReader(childTable);
if (log.isDebugEnabled())
if (!tableConfig.getName().equals(externalTableReader.getTableConfig().getName()))
log.debug("switching row context from table: '"
+ tableConfig.getName() + "' to table: '"
+ externalTableReader.getTableConfig().getName() + "'");
Map resultRows = null;
if (prop.isMany() && where != null) {
resultRows = this.sliceSupport.filter(childType, edges,
where, rowReader, externalTableReader);
}
assembleExternalEdges(target, prop, edges, rowReader,
resultRows, externalTableReader, level);
}
}
traverse(level);
}
private void assembleEdges(PlasmaDataObject target, PlasmaProperty prop,
Edge[] edges, Set sequences, RowReader rowReader,
TableReader childTableReader, RowReader childRowReader,
int level) throws IOException
{
for (Edge edge : edges) {
UUID uuid = UUID.fromString(edge.getUuid());
if (childRowReader.contains(uuid))
{
// we've seen this child before so his data is complete, just link
PlasmaDataObject existingChild = (PlasmaDataObject)childRowReader.getDataObject(uuid);
synchronized (existingChild) {
synchronized (target) {
link(existingChild, target, prop);
continue;
}
}
}
if (sequences != null && !sequences.contains(edge.getId()))
continue; // screen out edges
if (log.isDebugEnabled())
log.debug("local edge: "
+ target.getType().getURI() + "#" +target.getType().getName()
+ "->" + prop.getName() + " (" + edge.getUuid() + ")");
// create a child object
PlasmaDataObject child = null;
synchronized (target) {
child = createChild(target, prop, edge);
}
synchronized (childRowReader) {
childRowReader.addDataObject(child);
}
synchronized (this.distributedReader) {
this.distributedReader.mapRowReader(child,
childRowReader);
}
// indicate a non-concurrent traversal given this
// is not a slice assembler no fetch occurring for internal edges
traversals.add(new Traversal(child,
target, prop, childRowReader,
false, // indicate a non-concurrent traversal
level+1));
}
}
/**
* Assembles a given set of edges where the target is a different row, within this table or another.
* Since we are assembling a graph, each edge requires
* a new row reader. Each edge is a new root in the target table
* so need a new row reader for each.
* @param target the object source to which we link edges
* @param prop the edge property
* @param edges the edges
* @param rowReader the row reader
* @param childTableReader the table reader for the child objects
* @param level the assembly level
* @throws IOException
*/
protected void assembleExternalEdges(PlasmaDataObject target, PlasmaProperty prop,
Edge[] edges, RowReader rowReader, Map resultRows,
TableReader childTableReader, int level) throws IOException
{
for (Edge edge : edges) {
byte[] childRowKey = null;
UUID uuid = null;
Result childResult = null;
// need to look up an existing row reader based on the root UUID of the external graph
// or the row key, and the row key is all we have in the local graph state. The edge UUID
// is a local graph UUID.
childRowKey = rowReader.getGraphState().getRowKey(edge.getUuid()); // use local edge UUID
String childRowKeyStr = Bytes.toString(childRowKey);
if (resultRows != null && resultRows.get(childRowKeyStr) == null)
continue; //not found in predicate
// see if this row is locked during fetch, and wait for it
Object rowLock = fetchLocks.get(childRowKeyStr);
if (rowLock != null) {
synchronized (rowLock) {
try {
rowLock.wait();
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
}
}
}
RowReader existingChildRowReader = childTableReader.getRowReader(childRowKey);
if (existingChildRowReader != null)
{
// If assembled this row root before,
// just link it. The data is already complete.
PlasmaDataObject existingChild = (PlasmaDataObject)existingChildRowReader.getRootDataObject();
synchronized (existingChild) {
synchronized (target) {
link(existingChild, target, prop);
}
}
continue;
}
// While fetching this node, another thread can fail to find an existing row reader registered
// above and fall through to this fetch, and therefore fetch the same row, in addition
// to attempting to create the same row reader below, causing an error or warning
// The second thread may be arriving at this node from another property/edge and
// therefore need to link from another edge above.
fetchLocks.put(childRowKeyStr, new Object());
if (log.isDebugEnabled())
log.debug("fetch external row: "
+ prop.toString() + " (" + Bytes.toString(childRowKey) + ")");
childResult = fetchGraph(childRowKey, childTableReader, edge.getType());
if (childResult.containsColumn(rootTableReader.getTableConfig().getDataColumnFamilyNameBytes(),
GraphState.TOUMBSTONE_COLUMN_NAME_BYTES)) {
log.warn("ignoring toubstone result row '" +
childRowKeyStr + "'");
continue; // ignore toumbstone edge
}
// need to reconstruct the original graph, so need original UUID
uuid = reconstituteUUID(childResult, childTableReader);;
if (log.isDebugEnabled())
log.debug("external edge: "
+ target.getType().getURI() + "#" +target.getType().getName()
+ "->" + prop.getName() + " (" + uuid.toString() + ")");
PlasmaDataObject child = null;
synchronized (target) {
// create a child object using UUID from external row root
child = createChild(target, prop, edge, uuid);
}
RowReader childRowReader = null;
synchronized (childTableReader) {
childRowReader = childTableReader.createRowReader(
child, childResult);
}
synchronized (this.distributedReader) {
this.distributedReader.mapRowReader(child,
childRowReader);
}
traversals.add(new Traversal(child,
target, prop, childRowReader,
true,
level+1));
rowLock = fetchLocks.remove(childRowKeyStr);
synchronized (rowLock) {
rowLock.notifyAll();
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy