org.cloudgraph.hbase.graph.GraphAssembler Maven / Gradle / Ivy
/**
* CloudGraph Community Edition (CE) License
*
* This is a community release of CloudGraph, a dual-license suite of
* Service Data Object (SDO) 2.1 services designed for relational and
* big-table style "cloud" databases, such as HBase and others.
* This particular copy of the software is released under the
* version 2 of the GNU General Public License. CloudGraph was developed by
* TerraMeta Software, Inc.
*
* Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved.
*
* General License information can be found below.
*
* This distribution may include materials developed by third
* parties. For license and attribution notices for these
* materials, please refer to the documentation that accompanies
* this distribution (see the "Licenses for Third-Party Components"
* appendix) or view the online documentation at
* .
*/
package org.cloudgraph.hbase.graph;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.Set;
import java.util.UUID;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.cloudgraph.config.TableConfig;
import org.cloudgraph.hbase.io.DistributedReader;
import org.cloudgraph.hbase.io.OperationException;
import org.cloudgraph.hbase.io.RowReader;
import org.cloudgraph.hbase.io.TableReader;
import org.cloudgraph.state.GraphState;
import org.cloudgraph.state.GraphState.Edge;
import org.cloudgraph.store.service.GraphServiceException;
import org.plasma.query.collector.Selection;
import org.plasma.sdo.PlasmaDataObject;
import org.plasma.sdo.PlasmaProperty;
import org.plasma.sdo.PlasmaType;
import commonj.sdo.Property;
/**
* Constructs a data graph starting with a given root SDO type based on
* a map of selected SDO properties, where properties are mapped by
* selected types required in the result graph.
*
* The assembly is triggered by calling the
* {@link GraphAssembler#assemble(Result resultRow)} method which
* recursively reads HBase keys and values re-constituting the
* data graph. The assembly traversal is driven by HBase column
* values representing the original edges or containment structure
* of the graph.
*
*
* Since every column key in HBase must be unique, and a data graph
* may contain any number of nodes, a column key factory is used both
* to persist as well as re-constitute a graph. A minimal amount of
* "state" information is therefore stored with each graph which maps
* user readable sequence numbers (which are used in column keys) to
* UUID values. The nodes of the resulting data graph are re-created with
* the original UUID values.
*
*
* @see org.cloudgraph.hbase.key.StatefullColumnKeyFactory
*
* @author Scott Cinnamond
* @since 0.5.1
*/
public class GraphAssembler extends DistributedAssembler
{
private static Log log = LogFactory.getLog(GraphAssembler.class);
/**
* Constructor.
* @param rootType the SDO root type for the result data graph
* @param selection selected SDO properties. Properties are mapped by
* selected types required in the result graph.
* @param snapshotDate the query snapshot date which is populated
* into every data object in the result data graph.
*/
public GraphAssembler(PlasmaType rootType,
Selection selection,
DistributedReader distributedReader,
Timestamp snapshotDate)
{
super(rootType, selection, distributedReader, snapshotDate);
}
protected void assemble(PlasmaDataObject target,
PlasmaDataObject source, PlasmaProperty sourceProperty,
RowReader rowReader, int level) throws IOException
{
Set props = this.getProperties(target, source, sourceProperty, level);
if (props.size() == 0)
return;
if (log.isDebugEnabled())
log.debug("assembling("+level+"): " + target.toString() + ": " + props.toString());
assembleData(target, props, rowReader);
TableReader tableReader = rowReader.getTableReader();
TableConfig tableConfig = tableReader.getTableConfig();
// reference props
for (Property p : props) {
PlasmaProperty prop = (PlasmaProperty)p;
if (prop.getType().isDataType())
continue;
byte[] keyValue = getColumnValue(target, prop,
tableConfig, rowReader);
if (keyValue == null || keyValue.length == 0 ) {
continue; // zero length can happen on modification or delete as we keep cell history
}
if (log.isDebugEnabled())
log.debug(prop.toString() + ": " + Bytes.toString(keyValue));
Edge[] edges = rowReader.getGraphState().unmarshalEdges(
keyValue);
if (edges.length == 0) {
continue; // zero length can happen on modification or delete as we keep cell history
}
boolean external = isExternal(edges, rowReader);
if (!external) {
assembleEdges(target, prop, edges, rowReader,
tableReader, rowReader, level);
}
else {
String childTable = rowReader.getGraphState().getRowKeyTable(edges[0].getUuid());
if (childTable == null)
throw new OperationException("no table found for type, " +
edges[0].getType());
TableReader externalTableReader = distributedReader.getTableReader(childTable);
if (externalTableReader == null)
throw new OperationException("no table reader found for type, " +
edges[0].getType());
assembleExternalEdges(target, prop, edges, rowReader,
externalTableReader, level);
}
}
}
protected void assembleEdges(PlasmaDataObject target, PlasmaProperty prop,
Edge[] edges, RowReader rowReader,
TableReader childTableReader, RowReader childRowReader,
int level) throws IOException
{
for (Edge edge : edges) {
if (log.isDebugEnabled())
log.debug("local edge: "
+ target.getType().getURI() + "#" +target.getType().getName()
+ "->" + prop.getName() + " (" + edge.toString() + ")");
UUID uuid = UUID.fromString(edge.getUuid());
if (childRowReader.contains(uuid))
{
// we've seen this child before so his data is complete, just link
if (log.isDebugEnabled())
log.debug("linking existing local edge");
PlasmaDataObject existingChild = (PlasmaDataObject)childRowReader.getDataObject(uuid);
link(existingChild, target, prop);
continue;
}
// create a child object
PlasmaDataObject child = createChild(target, prop, edge);
childRowReader.addDataObject(child);
assembleEdge(target, prop, edge,
child, childRowReader, level);
}
}
/**
* Assembles a given set of edges where the target is a different row, within this table or another.
* Since we are assembling a graph, each edge requires
* a new row reader. Each edge is a new root in the target table
* so need a new row reader for each.
* @param target the object source to which we link edges
* @param prop the edge property
* @param edges the edges
* @param rowReader the row reader
* @param childTableReader the table reader for the child objects
* @param level the assembly level
* @throws IOException
*/
protected void assembleExternalEdges(PlasmaDataObject target, PlasmaProperty prop,
Edge[] edges, RowReader rowReader, TableReader childTableReader, int level) throws IOException
{
RowReader childRowReader = null;
for (Edge edge : edges) {
if (log.isDebugEnabled())
log.debug("external edge: "
+ target.getType().getURI() + "#" +target.getType().getName()
+ "->" + prop.getName() + " (" + edge.toString() + ")");
// need to look up an existing row reader based on the root UUID of the external graph
// or the row key, and the row key is all we have in the local graph state. The edge UUID
// is a local graph UUID.
byte[] childRowKey = rowReader.getGraphState().getRowKey(edge.getUuid()); // use local edge UUID
RowReader existingChildRowReader = childTableReader.getRowReader(childRowKey);
if (existingChildRowReader != null)
{
// If assembled this row root before,
// just link it. The data is already complete.
if (log.isDebugEnabled())
log.debug("linking existing external edge");
PlasmaDataObject existingChild = (PlasmaDataObject)existingChildRowReader.getRootDataObject();
link(existingChild, target, prop);
continue;
}
Result childResult = fetchGraph(childRowKey, childTableReader, edge.getType());
if (childResult.containsColumn(rootTableReader.getTableConfig().getDataColumnFamilyNameBytes(),
GraphState.TOUMBSTONE_COLUMN_NAME_BYTES)) {
String childRowKeyStr = Bytes.toString(childRowKey);
log.warn("ignoring toubstone result row '" +
childRowKeyStr + "'");
continue; // ignore toumbstone edge
}
// need to reconstruct the original graph, so need original UUID
byte[] rootUuid = childResult.getValue(Bytes.toBytes(
childTableReader.getTableConfig().getDataColumnFamilyName()),
Bytes.toBytes(GraphState.ROOT_UUID_COLUMN_NAME));
if (rootUuid == null)
throw new GraphServiceException("expected column: "
+ childTableReader.getTableConfig().getDataColumnFamilyName() + ":"
+ GraphState.ROOT_UUID_COLUMN_NAME);
String uuidStr = null;
uuidStr = new String(rootUuid,
childTableReader.getTableConfig().getCharset());
UUID uuid = UUID.fromString(uuidStr);
// create a child object using UUID from external row root
PlasmaDataObject child = createChild(target, prop, edge, uuid);
// create a row reader for every external edge
childRowReader = childTableReader.createRowReader(
child, childResult);
assembleEdge(target, prop, edge,
child, childRowReader, level);
}
}
}