![JAR search and dependency download from the Maven repository](/logo.png)
org.cloudgraph.rdb.graph.ParallelGraphAssembler Maven / Gradle / Ivy
Show all versions of cloudgraph-rdb Show documentation
/**
* CloudGraph Community Edition (CE) License
*
* This is a community release of CloudGraph, a dual-license suite of
* Service Data Object (SDO) 2.1 services designed for relational and
* big-table style "cloud" databases, such as HBase and others.
* This particular copy of the software is released under the
* version 2 of the GNU General Public License. CloudGraph was developed by
* TerraMeta Software, Inc.
*
* Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved.
*
* General License information can be found below.
*
* This distribution may include materials developed by third
* parties. For license and attribution notices for these
* materials, please refer to the documentation that accompanies
* this distribution (see the "Licenses for Third-Party Components"
* appendix) or view the online documentation at
* .
*/
package org.cloudgraph.rdb.graph;
import java.sql.Connection;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cloudgraph.common.CloudGraphConstants;
import org.cloudgraph.common.concurrent.ConfigProps;
import org.cloudgraph.common.concurrent.GraphMetricVisitor;
import org.cloudgraph.common.concurrent.SubgraphTask;
import org.cloudgraph.common.concurrent.Traversal;
import org.cloudgraph.rdb.filter.RDBStatementExecutor;
import org.cloudgraph.rdb.filter.RDBStatementFactory;
import org.cloudgraph.store.lang.DefaultAssembler;
import org.cloudgraph.store.lang.LangStoreGraphAssembler;
import org.plasma.query.collector.SelectionCollector;
import org.plasma.sdo.PlasmaDataObject;
import org.plasma.sdo.PlasmaProperty;
import org.plasma.sdo.PlasmaType;
import org.plasma.sdo.access.provider.common.PropertyPair;
import org.plasma.sdo.core.CoreNode;
import commonj.sdo.DataGraph;
import commonj.sdo.Property;
/**
* Constructs a data graph in parallel starting with a given root SDO type based on
* a given "selection graph", where processing
* proceeds as a breadth-first traversal and tasks/threads are dynamically added based on availability
* within a shared thread pool.
*
* While the result graph may be of any arbitrary size or depth, because the traversal is breadth-first,
* many tasks are typically spawned at the "base" of the graph, exhausting the available pool
* threads. Each subgraph task can spawn further sub tasks based on thread availability, but typically
* this means each task will traverse and process a healthy segment of the total graph. Since
* the actual size or depth of the result graph is not known until discovered on traversal, a fixed
* number of parallel tasks cannot be initially created, but must be dynamically spawned during graph discovery.
*
* The assembly is triggered by calling the
* {@link GraphAssembler#assemble(List results)} method which initializes
* the graph root and begins a breadth first traversal of the selection graph as
* represented in the underlying data store.
*
* Various metrics for the assembly are collected using {@link GraphMetricVisitor} and
* are available as SDO instance properties.
*
* @see org.plasma.query.collector.Selection
* @see ParallelSubgraphTask
* @see GraphMetricVisitor
*
* @author Scott Cinnamond
* @since 0.6.2
*/
public class ParallelGraphAssembler extends DefaultAssembler
implements LangStoreGraphAssembler {
private static Log log = LogFactory.getLog(ParallelGraphAssembler.class);
private ThreadPoolExecutor executorService;
private ConfigProps config;
/**
* Constructor.
*
* @param rootType
* the SDO root type for the result data graph
* @param collector
* selected SDO properties. Properties are mapped by selected
* types required in the result graph.
* @param snapshotDate
* the query snapshot date which is populated into every data
* object in the result data graph.
* @param minPoolSize the minimum or core size of the underlying thread pool used for
* all tasks executed under this assembler
* @param maxPoolSize the maximum size of the underlying thread pool used for
* all tasks executed under this assembler
* @param con
*/
public ParallelGraphAssembler(PlasmaType rootType, SelectionCollector collector,
Timestamp snapshotDate, ConfigProps config, Connection con) {
super(rootType, collector,
new RDBStatementFactory(), new RDBStatementExecutor(con),
new ConcurrentHashMap(),
snapshotDate);
this.executorService = new ThreadPoolExecutor(
config.getMinThreadPoolSize(), config.getMaxThreadPoolSize(),
0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue(),
new ThreadPoolExecutor.CallerRunsPolicy());
this.config = config;
}
public ThreadPoolExecutor getExecutorService() {
return executorService;
}
public ConfigProps getConfig() {
return config;
}
@Override
protected void link(PlasmaDataObject target, PlasmaDataObject source,
PlasmaProperty sourceProperty) {
synchronized (source) {
synchronized (target) {
super.link(target, source, sourceProperty);
}
}
}
@Override
protected PlasmaDataObject createDataObject(List row,
PlasmaDataObject source, PlasmaProperty sourceProperty) {
synchronized (this) {
return super.createDataObject(row, source, sourceProperty);
}
}
/**
* Recursively re-constitutes a data graph distributed across multiple
* tables and/or rows, starting with the given result row.
*
* To retrieve the graph use {@link GraphAssembler#getDataGraph()}.
*
* @param results the result row.
*/
@Override
public void assemble(List results) {
long before = System.currentTimeMillis();
DataGraph dataGraph = initRoot(results);
CoreNode rootNode = (CoreNode)dataGraph.getRootObject();
List traversals = new ArrayList();
// singular reference props
for (PropertyPair pair : results) {
if (pair.getProp().isMany() || pair.getProp().getType().isDataType())
continue;
List childKeyProps = this.getChildKeyPairs(pair);
Traversal trav = new Traversal((PlasmaType)pair.getProp().getType(),
(PlasmaDataObject)this.root,
pair.getProp(), childKeyProps, 1);
traversals.add(trav);
}
// multi reference props (not found in results)
Set props = this.collector.getProperties(this.rootType);
for (Property p : props) {
PlasmaProperty prop = (PlasmaProperty)p;
if (prop.isMany() && !prop.getType().isDataType()) {
List childKeyProps = this.getChildKeyPairs(root, prop);
Traversal trav = new Traversal((PlasmaType)prop.getType(),
(PlasmaDataObject)this.root,
prop, childKeyProps,
1);
traversals.add(trav);
}
}
// create concurrent tasks based on pool availability
logPoolStatistics();
int available = numThreadsAvailable();
if (available > traversals.size())
available = traversals.size();
List concurrentTasks = new ArrayList();
for (int i = 0; i < available; i++) {
Traversal trav = traversals.get(i);
SubgraphTask task = new ParallelSubgraphTask(
trav.getSubrootType(),
trav.getSource(),
this.collector,
this.getStatementFactory(), this.getStatementExecutor(),
trav.getSourceProperty(), trav.getChildKeyPairs(),
trav.getLevel(), i, this);
concurrentTasks.add(task);
}
// start any asynchronous assemblers
for (SubgraphTask task : concurrentTasks)
task.start();
for (SubgraphTask task : concurrentTasks)
task.join();
// add remainder
// continue with traversals for this thread
for (int i = available; i < traversals.size(); i++) {
Traversal trav = traversals.get(i);
ParallelSubgraphTask task = new ParallelSubgraphTask(
trav.getSubrootType(),
trav.getSource(),
this.collector,
this.getStatementFactory(), this.getStatementExecutor(),
trav.getSourceProperty(), trav.getChildKeyPairs(),
trav.getLevel(), traversals.size(), this);
task.assemble(); // this thread
}
if (log.isDebugEnabled())
log.debug("completed root " + this.root);
long after = System.currentTimeMillis();
rootNode.getValueObject().put(
CloudGraphConstants.GRAPH_ASSEMBLY_TIME,
Long.valueOf(after - before));
GraphMetricVisitor visitor = new GraphMetricVisitor();
this.root.accept(visitor);
rootNode.getValueObject().put(
CloudGraphConstants.GRAPH_NODE_COUNT,
Long.valueOf(visitor.getCount()));
rootNode.getValueObject().put(
CloudGraphConstants.GRAPH_DEPTH,
Long.valueOf(visitor.getDepth()));
rootNode.getValueObject().put(
CloudGraphConstants.GRAPH_THREAD_COUNT,
Long.valueOf(visitor.getThreadCount()));
}
@Override
protected void assemble(PlasmaType targetType, PlasmaDataObject source,
PlasmaProperty sourceProperty, List childKeyPairs,
int level) {
//noop -
}
public void logPoolStatistics() {
if (log.isDebugEnabled())
log.debug("active: " + executorService.getActiveCount() + ", size: " + executorService.getPoolSize());
}
public boolean threadsAvailable() {
return executorService.getActiveCount() < executorService.getMaximumPoolSize();
}
public int numThreadsAvailable() {
int result = executorService.getMaximumPoolSize() - executorService.getActiveCount();
if (result < 0)
result = 0;
return result;
}
}