All Downloads are FREE. Search and download functionalities are using the official Maven repository.

streams.application.ComputeGraph Maven / Gradle / Ivy

/*
 *  streams library
 *
 *  Copyright (C) 2011-2014 by Christian Bockermann, Hendrik Blom
 * 
 *  streams is a library, API and runtime environment for processing high
 *  volume data streams. It is composed of three submodules "stream-api",
 *  "stream-core" and "stream-runtime".
 *
 *  The streams library (and its submodules) is free software: you can 
 *  redistribute it and/or modify it under the terms of the 
 *  GNU Affero General Public License as published by the Free Software 
 *  Foundation, either version 3 of the License, or (at your option) any 
 *  later version.
 *
 *  The stream.ai library (and its submodules) is distributed in the hope
 *  that it will be useful, but WITHOUT ANY WARRANTY; without even the implied 
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package streams.application;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import stream.Process;
import stream.Processor;
import stream.io.Queue;
import stream.io.Sink;
import stream.io.Source;
import stream.io.Stream;
import stream.runtime.LifeCycle;
import stream.service.Service;

/**
 * 

* The compute graph is the class that provides book keeping about the data flow * of a process container. Essentially, the compute graph is used in three * different stages: * *

    *
  1. During parsing of the configuration, the graph is filled up with nodes * such as streams, processes, queues or services.
  2. *
  3. When preparing the startup of the container, the book-keeping information * is used for injecting the streams, queues and services.
  4. *
  5. While the container is running, the compute graph is used to check when * the container is finished.
  6. *
*

* * @author Christian Bockermann * */ public class ComputeGraph { static Logger log = LoggerFactory.getLogger(ComputeGraph.class); final Set nodes = new LinkedHashSet(); final List edges = new ArrayList(); /** The list of references from elements to queues */ final List queueRefs = new ArrayList(); final List sourceRefs = new ArrayList(); /** The list of references from elements to services */ final List serviceRefs = new ArrayList(); /** The services defined in the graph */ final Map services = new LinkedHashMap(); /** The streams defined in the graph */ final Map sources = new LinkedHashMap(); /** The queues defined in the graph */ final Map sinks = new LinkedHashMap(); /** The process nodes of the graph */ final Map processes = new LinkedHashMap(); final Set finished = new LinkedHashSet(); public synchronized void add(Object from, Object to) { add(from); add(to); edges.add(new Edge(from, to)); this.notify(); } public void add(Object node) { nodes.add(node); } /** * This method returns a set of sources that are referenced as inputs (e.g. * by processes) but are not referenced as outputs (e.g. outputs by * "enqueue"). * * @return */ public synchronized Set getRootSources() { Set sources = getSources(); Set srcs = new LinkedHashSet(); Iterator it = sources.iterator(); while (it.hasNext()) { Object source = it.next(); if (finished.contains(source)) continue; if (source instanceof Source) { if (getSourcesFor(source).isEmpty()) srcs.add((Source) source); } } return srcs; } public synchronized Set getNonRefQueues() { Set targets = getTargets(); Set targs = new LinkedHashSet(); Iterator it = targets.iterator(); while (it.hasNext()) { Object target = it.next(); if (finished.contains(target)) continue; if (target instanceof Sink) { if (getTargets(target).isEmpty()) { targs.add(target); } } it.remove(); } return targs; } public synchronized Set getSources() { Set nodes = new LinkedHashSet(); for (Edge edge : edges) { if (finished.contains(edge.getFrom()) || finished.contains(edge.getTo())) continue; nodes.add(edge.getFrom()); } return nodes; } public synchronized Set getTargets() { Set nodes = new LinkedHashSet(); for (Edge edge : edges) { if (finished.contains(edge.getFrom()) || finished.contains(edge.getTo())) continue; nodes.add(edge.getTo()); } return nodes; } public synchronized Set getTargets(Object from) { Set nodes = new LinkedHashSet(); for (Edge edge : edges) { if (finished.contains(edge.getFrom()) || finished.contains(edge.getTo())) continue; if (edge.getFrom() == from) nodes.add(edge.getTo()); } return nodes; } public synchronized Set getReferencedObjects() { Set nodes = new LinkedHashSet(); for (Edge edge : edges) { if (finished.contains(edge.getFrom()) || finished.contains(edge.getTo())) continue; nodes.add(edge.getTo()); } return nodes; } public synchronized Set getSourcesFor(Object target) { Set nodes = new LinkedHashSet(); for (Edge edge : edges) { if (finished.contains(edge.getFrom()) || finished.contains(edge.getTo())) continue; if (edge.getTo() == target) nodes.add(edge.getFrom()); } return nodes; } public synchronized Set getIsolated() { Set nodes = new LinkedHashSet(); for (Object node : this.nodes) { if (finished.contains(node)) continue; if (getSourcesFor(node).isEmpty()) nodes.add(node); } return nodes; } public Set allNodes() { return Collections.unmodifiableSet(nodes); } public Set nodes() { Set ns = new LinkedHashSet(); for (Object node : nodes) { if (!finished.contains(node)) { ns.add(node); } } return Collections.unmodifiableSet(ns); } public synchronized void clear() { nodes.clear(); finished.clear(); edges.clear(); this.notify(); } public synchronized List remove(Object o) { if (!nodes.contains(o)) { return new ArrayList(); } List objs = remove(o, false); this.notifyAll(); return objs; } private synchronized List remove(Object o, boolean notify) { log.debug("Removing {} from dependency-graph...", o); log.debug(" {} references: {}", o, this.getTargets(o)); List lifeObjects = new ArrayList(); if (!nodes.contains(o)) { return lifeObjects; } if (finished.contains(o)) { log.debug("Object {} already finished.", o); return lifeObjects; } if (o instanceof LifeCycle) { lifeObjects.add((LifeCycle) o); } boolean closed = false; if (o instanceof Queue) { int refs = this.getSourcesFor(o).size(); log.debug("Trying to remove queue {}, which is being fed by {} elements", o, refs); if (refs == 0) { try { log.debug("Closing queue {}", o); ((Queue) o).close(); } catch (Exception e) { if (log.isDebugEnabled()) e.printStackTrace(); } finished.add(o); } closed = true; } if (o instanceof Source && !(o instanceof Queue)) { finished.add(o); try { log.debug("Removing and closing source {}", ((Source) o).getId()); synchronized (o) { if (!closed) { // a queue is a source as well, it will have // already been closed by the code above ((Source) o).close(); closed = true; } } } catch (Exception e) { log.error("Failed to close source '{}': ", ((Source) o).getId(), e.getMessage()); if (log.isDebugEnabled()) e.printStackTrace(); } } // is this at all required? it does not hurt, though, but the // compute-graph should in theory only consist of sinks, sources and // processes. // CB: Yes, it IS required as is removes the edges of processors to // queues. // if (o instanceof Process) { List processors = ((Process) o).getProcessors(); log.debug("Removing {} nested processors of {}", processors.size(), o); for (Processor p : processors) { remove(p, notify); } finished.add(o); Source source = ((Process) o).getInput(); Set refs = this.getTargets(source); log.debug("Source {} is referenced by {} nodes.", source.getId(), refs.size()); if (refs.size() == 0) { log.debug("Removing source {}", source.getId()); remove(source, notify); } Sink sink = ((Process) o).getOutput(); if (sink != null) { refs = getTargets(sink); if (refs.size() == 0) { log.debug("sink {} does not have any more feeders", sink.getId()); remove(sink, notify); } } } Iterator it = (new ArrayList(edges)).iterator(); while (it.hasNext()) { Edge edge = it.next(); if (edge.getFrom() == o) { log.debug("[graph-shutdown] Removing edge ( {} => {} )", edge.getFrom(), edge.getTo()); finished.add(o); // this.nodes.remove(o); // this.edges.remove(edge); Object target = edge.getTo(); if (this.getSourcesFor(target).isEmpty()) { log.debug("[graph-shutdown] -> No more references to {}, adding to shutdown-queue", target); lifeObjects.addAll(remove(target, notify)); } else { log.debug("target {} has {} references left", target, getSourcesFor(target).size()); } } if (edge.getTo() == o) { log.debug("Removing edge {} => {} (this)", edge.getFrom(), o); it.remove(); edges.remove(edge); } } // log.debug("[dep-graph] Reference counts: "); // for (Object node : this.nodes) { // log.debug("[dep-graph] * {} is referenced by {} ", node, // this.getSourcesFor(node)); // } return lifeObjects; } public synchronized void printShutdownStrategy() { List all = new ArrayList(); all.addAll(this.nodes); Set finished = new LinkedHashSet(); java.util.Queue waiting = new LinkedBlockingQueue(); waiting.addAll(getIsolated()); while (!waiting.isEmpty()) { Object next = waiting.poll(); log.trace("[graph-shutdown] Shutting down {}", next); finished.add(next); all.remove(next); } log.trace("[dep-graph] Reference counts: "); for (Object node : this.nodes) { log.trace("[dep-graph] * {} is referenced by {} objects", node, this.getSourcesFor(node).size()); } } public Collection getAll(Class pattern) { List matching = new ArrayList(); for (Object o : this.nodes) { if (pattern.isAssignableFrom(o.getClass())) { matching.add(o); } } return matching; } public void addReference(SinkRef qref) { this.queueRefs.add(qref); } public void addReference(SourceRef ref) { this.sourceRefs.add(ref); } public List sourceRefs() { return sourceRefs; } public List sinkRefs() { return queueRefs; } public void addReference(ServiceRef sref) { this.serviceRefs.add(sref); } public List serviceRefs() { return serviceRefs; } /** * Adds a service to the compute graph. * * @param id * The id of the service. * @param service * The instance of the service. */ public void addService(String id, Service service) { if (services.containsKey(id)) throw new RuntimeException("A service with id '" + id + "' has already been defined!"); this.services.put(id, service); } public Map services() { return Collections.unmodifiableMap(services); } /** * Adds a stream with the given ID to the compute graph. * * @param id * The id of the stream. * @param stream * The instance of the stream. */ public void addStream(String id, Stream stream) { if (sources.containsKey(id)) throw new RuntimeException("A stream with id '" + id + "' has already been defined!"); this.sources.put(id, stream); nodes.add(stream); } public Map sources() { return Collections.unmodifiableMap(sources); } /** * Adds a process with the given ID to the compute graph. * * @param id * The id of the process. * @param process * The instance of the process. */ public void addProcess(String id, Process process) { if (processes.containsKey(id)) throw new RuntimeException("A process with id '" + id + "' has already been defined!"); this.processes.put(id, process); // We add a reference from the process to each processor. Later these // references are used to remove the processors from the runtime graph // as soon as the processes is being terminated. // for (Processor p : process.getProcessors()) { this.add(process, p); } } public Map processes() { return Collections.unmodifiableMap(processes); } public void addQueue(String id, stream.io.Queue queue) { addSource(id, queue); addSink(id, queue); nodes.add(queue); } public void addSink(String id, Sink sink) { if (sinks.containsKey(id)) throw new RuntimeException("A queue with id '" + id + "' has already been defined!"); sinks.put(id, sink); } public void addSource(String id, Source source) { if (sources.containsKey(id)) throw new RuntimeException("A stream with id '" + id + "' has already been defined!"); sources.put(id, source); } public Map sinks() { return Collections.unmodifiableMap(sinks); } public static class Edge { static Integer lastId = 0; final Integer id = lastId++; final Object from; final Object to; public Edge(Object from, Object to) { this.from = from; this.to = to; } public Object getFrom() { return from; } public Object getTo() { return to; } } public boolean isFinished(Object n) { return finished.contains(n); } public final static class SinkRef extends Reference { public SinkRef(Object o, String property, String queueId) { super(o, property, queueId); } public SinkRef(Object o, String property, String[] refs) { super(o, property, refs); } } public final static class SourceRef extends Reference { public SourceRef(Object o, String property, String queueId) { super(o, property, queueId); } } public final static class ServiceRef extends Reference { final Class type; public ServiceRef(Object o, String property, String[] queueId, Class type) { super(o, property, queueId); this.type = type; } public Class type() { return type; } } }