All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cloudgraph.rdb.graph.ParallelGraphAssembler Maven / Gradle / Ivy

Go to download

CloudGraph(tm) is a suite of Service Data Object (SDO) 2.1 services designed for relational and big-table style "cloud" databases, such as HBase and others.

The newest version!
/**
 * Copyright 2017 TerraMeta Software, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.cloudgraph.rdb.graph;

import java.sql.Connection;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cloudgraph.common.CloudGraphConstants;
import org.cloudgraph.common.concurrent.ConfigProps;
import org.cloudgraph.common.concurrent.GraphMetricVisitor;
import org.cloudgraph.common.concurrent.SubgraphTask;
import org.cloudgraph.common.concurrent.Traversal;
import org.cloudgraph.rdb.filter.RDBStatementExecutor;
import org.cloudgraph.rdb.filter.RDBStatementFactory;
import org.cloudgraph.store.lang.DefaultAssembler;
import org.cloudgraph.store.lang.LangStoreGraphAssembler;
import org.plasma.query.collector.SelectionCollector;
import org.plasma.sdo.PlasmaDataObject;
import org.plasma.sdo.PlasmaProperty;
import org.plasma.sdo.PlasmaType;
import org.plasma.sdo.access.provider.common.PropertyPair;
import org.plasma.sdo.core.CoreNode;

import commonj.sdo.DataGraph;
import commonj.sdo.Property;

/**
 * Constructs a data graph in parallel starting with a given root SDO type based
 * on a given "selection graph", where processing proceeds as a breadth-first
 * traversal and tasks/threads are dynamically added based on availability
 * within a shared thread pool.
 * 

* While the result graph may be of any arbitrary size or depth, because the * traversal is breadth-first, many tasks are typically spawned at the "base" of * the graph, exhausting the available pool threads. Each subgraph task can * spawn further sub tasks based on thread availability, but typically this * means each task will traverse and process a healthy segment of the total * graph. Since the actual size or depth of the result graph is not known until * discovered on traversal, a fixed number of parallel tasks cannot be initially * created, but must be dynamically spawned during graph discovery. *

* The assembly is triggered by calling the {@link * GraphAssembler#assemble(List results)} method which initializes * the graph root and begins a breadth first traversal of the selection graph as * represented in the underlying data store. *

* Various metrics for the assembly are collected using * {@link GraphMetricVisitor} and are available as SDO instance properties. * * @see org.plasma.query.collector.Selection * @see ParallelSubgraphTask * @see GraphMetricVisitor * * @author Scott Cinnamond * @since 0.6.2 */ public class ParallelGraphAssembler extends DefaultAssembler implements LangStoreGraphAssembler { private static Log log = LogFactory.getLog(ParallelGraphAssembler.class); private ThreadPoolExecutor executorService; private ConfigProps config; /** * Constructor. * * @param rootType * the SDO root type for the result data graph * @param collector * selected SDO properties. Properties are mapped by selected types * required in the result graph. * @param snapshotDate * the query snapshot date which is populated into every data object * in the result data graph. * @param minPoolSize * the minimum or core size of the underlying thread pool used for * all tasks executed under this assembler * @param maxPoolSize * the maximum size of the underlying thread pool used for all tasks * executed under this assembler * @param con */ public ParallelGraphAssembler(PlasmaType rootType, SelectionCollector collector, Timestamp snapshotDate, ConfigProps config, Connection con) { super(rootType, collector, new RDBStatementFactory(), new RDBStatementExecutor(con), new ConcurrentHashMap(), snapshotDate); this.executorService = new ThreadPoolExecutor(config.getMinThreadPoolSize(), config.getMaxThreadPoolSize(), 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue(), new ThreadPoolExecutor.CallerRunsPolicy()); this.config = config; } public ThreadPoolExecutor getExecutorService() { return executorService; } public ConfigProps getConfig() { return config; } @Override protected void link(PlasmaDataObject target, PlasmaDataObject source, PlasmaProperty sourceProperty) { synchronized (source) { synchronized (target) { super.link(target, source, sourceProperty); } } } @Override protected PlasmaDataObject createDataObject(List row, PlasmaDataObject source, PlasmaProperty sourceProperty) { synchronized (this) { return super.createDataObject(row, source, sourceProperty); } } /** * Recursively re-constitutes a data graph distributed across multiple tables * and/or rows, starting with the given result row. *

* To retrieve the graph use {@link GraphAssembler#getDataGraph()}. *

* * @param results * the result row. */ @Override public void assemble(List results) { long before = System.currentTimeMillis(); DataGraph dataGraph = initRoot(results); CoreNode rootNode = (CoreNode) dataGraph.getRootObject(); List traversals = new ArrayList(); // singular reference props for (PropertyPair pair : results) { if (pair.getProp().isMany() || pair.getProp().getType().isDataType()) continue; List childKeyProps = this.getChildKeyPairs(pair); Traversal trav = new Traversal((PlasmaType) pair.getProp().getType(), (PlasmaDataObject) this.root, pair.getProp(), childKeyProps, 1); traversals.add(trav); } // multi reference props (not found in results) Set props = this.collector.getProperties(this.rootType); for (Property p : props) { PlasmaProperty prop = (PlasmaProperty) p; if (prop.isMany() && !prop.getType().isDataType()) { List childKeyProps = this.getChildKeyPairs(root, prop); Traversal trav = new Traversal((PlasmaType) prop.getType(), (PlasmaDataObject) this.root, prop, childKeyProps, 1); traversals.add(trav); } } // create concurrent tasks based on pool availability logPoolStatistics(); int available = numThreadsAvailable(); if (available > traversals.size()) available = traversals.size(); List concurrentTasks = new ArrayList(); for (int i = 0; i < available; i++) { Traversal trav = traversals.get(i); SubgraphTask task = new ParallelSubgraphTask(trav.getSubrootType(), trav.getSource(), this.collector, this.getStatementFactory(), this.getStatementExecutor(), trav.getSourceProperty(), trav.getChildKeyPairs(), trav.getLevel(), i, this); concurrentTasks.add(task); } // start any asynchronous assemblers for (SubgraphTask task : concurrentTasks) task.start(); for (SubgraphTask task : concurrentTasks) task.join(); // add remainder // continue with traversals for this thread for (int i = available; i < traversals.size(); i++) { Traversal trav = traversals.get(i); ParallelSubgraphTask task = new ParallelSubgraphTask(trav.getSubrootType(), trav.getSource(), this.collector, this.getStatementFactory(), this.getStatementExecutor(), trav.getSourceProperty(), trav.getChildKeyPairs(), trav.getLevel(), traversals.size(), this); task.assemble(); // this thread } if (log.isDebugEnabled()) log.debug("completed root " + this.root); long after = System.currentTimeMillis(); rootNode.getValueObject().put(CloudGraphConstants.GRAPH_ASSEMBLY_TIME, Long.valueOf(after - before)); GraphMetricVisitor visitor = new GraphMetricVisitor(); this.root.accept(visitor); rootNode.getValueObject().put(CloudGraphConstants.GRAPH_NODE_COUNT, Long.valueOf(visitor.getCount())); rootNode.getValueObject() .put(CloudGraphConstants.GRAPH_DEPTH, Long.valueOf(visitor.getDepth())); rootNode.getValueObject().put(CloudGraphConstants.GRAPH_THREAD_COUNT, Long.valueOf(visitor.getThreadCount())); } @Override protected void assemble(PlasmaType targetType, PlasmaDataObject source, PlasmaProperty sourceProperty, List childKeyPairs, int level) { // noop - } public void logPoolStatistics() { if (log.isDebugEnabled()) log.debug("active: " + executorService.getActiveCount() + ", size: " + executorService.getPoolSize()); } public boolean threadsAvailable() { return executorService.getActiveCount() < executorService.getMaximumPoolSize(); } public int numThreadsAvailable() { int result = executorService.getMaximumPoolSize() - executorService.getActiveCount(); if (result < 0) result = 0; return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy