Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.datacleaner.widgets.visualization.JobGraphNodeBuilder Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Free Software Foundation, Inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.widgets.visualization;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.metamodel.schema.Table;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.OutputDataStream;
import org.datacleaner.job.ComponentRequirement;
import org.datacleaner.job.FilterOutcome;
import org.datacleaner.job.HasComponentRequirement;
import org.datacleaner.job.HasFilterOutcomes;
import org.datacleaner.job.InputColumnSinkJob;
import org.datacleaner.job.InputColumnSourceJob;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.ComponentBuilder;
import org.datacleaner.util.SourceColumnFinder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.uci.ics.jung.graph.DirectedGraph;
import edu.uci.ics.jung.graph.DirectedSparseGraph;
import edu.uci.ics.jung.graph.util.EdgeType;
/**
* Object responsible for building the nodes (vertices and edges) of a
* {@link JobGraph}.
*/
class JobGraphNodeBuilder {
private static class JobGraphElementFactory {
public JobGraphLink createLink(final Object from, final Object to, final ComponentRequirement requirement,
final FilterOutcome filterOutcome) {
return new JobGraphLink(from, to, requirement, filterOutcome, null);
}
public Object createVertex(final Object item) {
return item;
}
}
private static final Logger logger = LoggerFactory.getLogger(JobGraphNodeBuilder.class);
private final AnalysisJobBuilder _analysisJobBuilder;
public JobGraphNodeBuilder(final AnalysisJobBuilder analysisJobBuilder) {
_analysisJobBuilder = analysisJobBuilder;
}
public DirectedGraph buildGraph() {
final DirectedGraph graph = new DirectedSparseGraph<>();
final List sourceTables = _analysisJobBuilder.getSourceTables();
buildGraphInternal(graph, _analysisJobBuilder, sourceTables);
return graph;
}
private void buildGraphInternal(final DirectedGraph graph,
final AnalysisJobBuilder analysisJobBuilder, final List sourceTables) {
final JobGraphElementFactory linkFactory = new JobGraphElementFactory();
buildGraphInternal(graph, analysisJobBuilder, sourceTables, linkFactory);
}
private void buildGraphInternal(final DirectedGraph graph,
final AnalysisJobBuilder analysisJobBuilder, final List sourceTables,
final JobGraphElementFactory linkFactory) {
// note: currently SourceColumnFinder cannot cross links from
// OutputDataStreams to the main/parent AnalysisJobBuilder, so we create
// a new SourceColumnFinder for each AnalysisJobBuilder instead of
// reusing the instance.
final SourceColumnFinder sourceColumnFinder = new SourceColumnFinder();
sourceColumnFinder.addSources(analysisJobBuilder);
for (final Table table : sourceTables) {
addNodes(graph, sourceColumnFinder, linkFactory, table, -1);
}
final Collection componentBuilders = analysisJobBuilder.getComponentBuilders();
for (final ComponentBuilder componentBuilder : componentBuilders) {
addNodes(graph, sourceColumnFinder, linkFactory, componentBuilder, -1);
}
removeUnnecesaryEdges(graph, sourceColumnFinder);
}
/**
* Removes links/edges that are "unnecesary". The rationale here is that we
* do not want to show every possible link since it clutters readability.
*
* We remove links that represent a direct edge between nodes that are also
* indirectly linked. Not in "diamond" shaped scenarios, but only in
* scenarios where a path from A->Z is already represented via A->B->Z or
* A->B->C->Z etc.
*
* @param graph
* @param sourceColumnFinder
*/
private void removeUnnecesaryEdges(final DirectedGraph graph,
final SourceColumnFinder sourceColumnFinder) {
// This loop is not very pretty but it ensures that we don't prematurely
// stop looking for stuff to remove from the graph. The issue is that
// with the current design, the removeUnnecesaryEdges method may remove
// something which then should call for a re-evaluation of other edges
// to be removed.
boolean removedSomething = true;
while (removedSomething) {
removedSomething = removeUnnecesaryEdgesIfAny(graph, sourceColumnFinder);
}
}
/**
* Runs a single check through the graph to remove unnecesary edges (see
* {@link #removeUnnecesaryEdges(DirectedGraph, SourceColumnFinder)} if any
* are found.
*
* @param graph
* @param sourceColumnFinder
*
* @return whether or not any edges were removed
*/
private boolean removeUnnecesaryEdgesIfAny(final DirectedGraph graph,
final SourceColumnFinder sourceColumnFinder) {
final Collection allLinks = graph.getEdges();
final List linksToRemove = new ArrayList<>();
for (final JobGraphLink link : allLinks) {
boolean removeable = true;
if (link.getRequirement() != null) {
// only links without requirements are candidates for removal
removeable = false;
}
if (removeable) {
final Object toVertex = link.getTo();
final Collection edgesGoingIn = graph.getInEdges(toVertex);
if (edgesGoingIn.size() <= 1) {
// if this is the only edge going in, there is no
// special interest
removeable = false;
}
if (removeable) {
// check if these links represents a "shortcut" path that
// can be left out
for (final JobGraphLink edgeGoingIn : edgesGoingIn) {
if (edgeGoingIn != link) {
if (!isEdgeShortcutFor(graph, link, edgeGoingIn)) {
removeable = false;
break;
}
}
}
if (removeable) {
logger.debug("Removing unnecesary JobGraphLink: {}", link);
linksToRemove.add(link);
}
}
}
}
for (final JobGraphLink link : linksToRemove) {
graph.removeEdge(link);
}
return !linksToRemove.isEmpty();
}
private boolean isEdgeShortcutFor(final DirectedGraph graph,
final JobGraphLink potentialShortcut, final JobGraphLink otherEdge) {
return isEdgeShortcutFor(graph, potentialShortcut, otherEdge, new HashSet<>());
}
private boolean isEdgeShortcutFor(final DirectedGraph graph,
final JobGraphLink potentialShortcut, final JobGraphLink otherEdge, final Set checkedEdges) {
if (otherEdge == null) {
return false;
}
final Object from = potentialShortcut.getFrom();
final Object otherFrom = otherEdge.getFrom();
if (from == otherFrom) {
return true;
}
final Collection inEdges = graph.getInEdges(otherFrom);
if (inEdges.isEmpty()) {
// this could be improved since also scenarios with +1 inEdges
// could be analyzed
return false;
}
for (final JobGraphLink inEdge : inEdges) {
if (checkedEdges.contains(inEdge)) {
// skip
} else {
// prevent recursive nightmares - see issue #326
checkedEdges.add(inEdge);
if (!isEdgeShortcutFor(graph, potentialShortcut, inEdge, checkedEdges)) {
return false;
}
}
}
return true;
}
private void addNodes(final DirectedGraph graph, final SourceColumnFinder scf,
final JobGraphElementFactory linkFactory, final Object item, int recurseCount) {
if (item == null) {
throw new IllegalArgumentException("Node item cannot be null");
}
final Object vertex = linkFactory.createVertex(item);
if (vertex instanceof InputColumn) {
return;
} else if (vertex instanceof FilterOutcome) {
return;
}
if (!graph.containsVertex(vertex)) {
graph.addVertex(vertex);
if (recurseCount == 0) {
return;
}
// decrement recurseCount
recurseCount--;
if (vertex instanceof InputColumnSinkJob) {
final InputColumn>[] inputColumns = ((InputColumnSinkJob) vertex).getInput();
for (final InputColumn> inputColumn : inputColumns) {
// add the origin of the column
if (inputColumn.isVirtualColumn()) {
final InputColumnSourceJob source = scf.findInputColumnSource(inputColumn);
if (source != null) {
addNodes(graph, scf, linkFactory, source, recurseCount);
addEdge(graph, linkFactory, source, vertex);
}
}
if (inputColumn.isPhysicalColumn()) {
final Table table = inputColumn.getPhysicalColumn().getTable();
if (table != null) {
addNodes(graph, scf, linkFactory, table, recurseCount);
addEdge(graph, linkFactory, table, vertex);
}
}
}
}
if (vertex instanceof FilterOutcome) {
final HasFilterOutcomes source = scf.findOutcomeSource((FilterOutcome) vertex);
if (source != null) {
addNodes(graph, scf, linkFactory, source, recurseCount);
addEdge(graph, linkFactory, source, vertex);
}
}
if (vertex instanceof HasComponentRequirement) {
final HasComponentRequirement hasComponentRequirement = (HasComponentRequirement) vertex;
final Collection filterOutcomes =
getProcessingDependencyFilterOutcomes(hasComponentRequirement);
for (final FilterOutcome filterOutcome : filterOutcomes) {
// add the origin of the filter outcome
final HasFilterOutcomes source = scf.findOutcomeSource(filterOutcome);
if (source != null) {
addNodes(graph, scf, linkFactory, source, recurseCount);
addEdge(graph, linkFactory, source, vertex, hasComponentRequirement.getComponentRequirement(),
filterOutcome);
}
}
}
if (vertex instanceof InputColumn) {
final InputColumn> inputColumn = (InputColumn>) vertex;
if (inputColumn.isVirtualColumn()) {
final InputColumnSourceJob source = scf.findInputColumnSource(inputColumn);
if (source != null) {
addNodes(graph, scf, linkFactory, source, recurseCount);
addEdge(graph, linkFactory, source, vertex);
}
}
if (inputColumn.isPhysicalColumn()) {
final Table table = inputColumn.getPhysicalColumn().getTable();
if (table != null) {
addNodes(graph, scf, linkFactory, table, recurseCount);
addEdge(graph, linkFactory, table, vertex);
}
}
}
if (vertex instanceof ComponentBuilder) {
final ComponentBuilder componentBuilder = (ComponentBuilder) vertex;
for (final OutputDataStream outputDataStream : componentBuilder.getOutputDataStreams()) {
if (componentBuilder.isOutputDataStreamConsumed(outputDataStream)) {
final AnalysisJobBuilder outputDataStreamJobBuilder =
componentBuilder.getOutputDataStreamJobBuilder(outputDataStream);
final List sourceTables = outputDataStreamJobBuilder.getSourceTables();
final JobGraphElementFactory childLinkFactory = new JobGraphElementFactory() {
@Override
public JobGraphLink createLink(final Object from, final Object to,
final ComponentRequirement requirement, final FilterOutcome filterOutcome) {
if (sourceTables.contains(from)) {
// replace "from" with "vertex" and add the
// outputDataStream
return new JobGraphLink(vertex, to, requirement, filterOutcome, outputDataStream);
}
return new JobGraphLink(from, to, requirement, filterOutcome, null);
}
@Override
public Object createVertex(final Object item) {
if (sourceTables.contains(item)) {
// replace table with source vertex
return vertex;
}
return super.createVertex(item);
}
};
buildGraphInternal(graph, outputDataStreamJobBuilder, sourceTables, childLinkFactory);
}
}
}
}
}
private Collection getProcessingDependencyFilterOutcomes(final HasComponentRequirement item) {
final ComponentRequirement componentRequirement = item.getComponentRequirement();
if (componentRequirement == null) {
return Collections.emptyList();
}
return componentRequirement.getProcessingDependencies();
}
private void addEdge(final DirectedGraph graph, final JobGraphElementFactory linkFactory,
final Object from, final Object to) {
addEdge(graph, linkFactory, from, to, null, null, null);
}
private void addEdge(final DirectedGraph graph, final JobGraphElementFactory linkFactory,
final Object from, final Object to, final ComponentRequirement requirement,
final FilterOutcome filterOutcome) {
addEdge(graph, linkFactory, from, to, requirement, filterOutcome, null);
}
private void addEdge(final DirectedGraph graph, final JobGraphElementFactory linkFactory,
final Object from, final Object to, final ComponentRequirement requirement,
final FilterOutcome filterOutcome, final OutputDataStream outputDataStream) {
final JobGraphLink link = linkFactory.createLink(from, to, requirement, filterOutcome);
if (!graph.containsEdge(link)) {
graph.addEdge(link, link.getFrom(), link.getTo(), EdgeType.DIRECTED);
}
}
}