All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.util.PreviewUtils Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.util;

import java.util.Collection;
import java.util.LinkedList;
import java.util.List;

import org.apache.metamodel.schema.Table;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.OutputDataStream;
import org.datacleaner.components.maxrows.MaxRowsFilter;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.ComponentRequirement;
import org.datacleaner.job.CompoundComponentRequirement;
import org.datacleaner.job.HasFilterOutcomes;
import org.datacleaner.job.SimpleComponentRequirement;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.builder.ComponentBuilder;
import org.datacleaner.job.builder.FilterComponentBuilder;
import org.datacleaner.job.builder.TransformerComponentBuilder;

public class PreviewUtils {

    public static final String METADATA_PROPERTY_MARKER = "org.datacleaner.preview.targetcomponent";

    public static void limitJobRows(final AnalysisJobBuilder jobBuilder,
            final Collection componentBuilders, final int previewRows) {
        {
            final SourceColumnFinder sourceColumnFinder = new SourceColumnFinder();
            sourceColumnFinder.addSources(jobBuilder);
            final List sourceTables = jobBuilder.getSourceTables();
            final int maxRows = Double.valueOf(Math.ceil(((double) previewRows) / sourceTables.size())).intValue();
            for (final Table table : sourceTables) {
                final String filterName = PreviewUtils.class.getName() + "-" + table.getName() + "-MaxRows";

                final FilterComponentBuilder maxRowFilter =
                        jobBuilder.getFilterComponentBuilderByName(filterName).orElseGet(() -> {
                            final FilterComponentBuilder filter =
                                    jobBuilder.addFilter(MaxRowsFilter.class);
                            filter.setName(filterName);
                            filter.getComponentInstance().setMaxRows(maxRows);
                            filter.getComponentInstance().setApplyOrdering(false);
                            filter.getComponentInstance()
                                    .setOrderColumn(jobBuilder.getSourceColumnsOfTable(table).get(0));
                            return filter;
                        });

                componentBuilders.stream().filter(cb -> cb != maxRowFilter).forEach(componentBuilder -> {
                    final InputColumn[] input = componentBuilder.getInput();
                    if (input.length > 0) {
                        if (componentBuilder.getDescriptor().isMultiStreamComponent()
                                || sourceColumnFinder.findOriginatingTable(input[0]) == table) {
                            final ComponentRequirement existingRequirement = componentBuilder.getComponentRequirement();
                            if (existingRequirement != null) {
                                if (componentBuilder.getDescriptor().isMultiStreamComponent()) {
                                    componentBuilder.setComponentRequirement(
                                            new CompoundComponentRequirement(existingRequirement,
                                                    maxRowFilter.getFilterOutcome(MaxRowsFilter.Category.VALID)));
                                }
                            } else {
                                componentBuilder.setComponentRequirement(new SimpleComponentRequirement(
                                        maxRowFilter.getFilterOutcome(MaxRowsFilter.Category.VALID)));
                            }
                        }
                    }
                });
            }
        }
    }

    public static AnalysisJobBuilder copy(final AnalysisJobBuilder original) {
        final AnalysisJob analysisJob = original.getRootJobBuilder().withoutListeners().toAnalysisJob(false);
        return new AnalysisJobBuilder(original.getConfiguration(), analysisJob);
    }

    public static void sanitizeIrrelevantComponents(final AnalysisJobBuilder ajb,
            final TransformerComponentBuilder tjb) {
        final List relevantAnalysisJobBuilders = createRelevantAnalysisJobBuildersList(ajb);

        for (final AnalysisJobBuilder relevantAnalysisJobBuilder : relevantAnalysisJobBuilders) {
            final Collection componentBuilders = relevantAnalysisJobBuilder.getComponentBuilders();
            for (final ComponentBuilder componentBuilder : componentBuilders) {

                // flag to indicate if this component is directly involved in
                // populating data for the previewed component
                boolean importantComponent = componentBuilder == tjb;

                final List streams = componentBuilder.getOutputDataStreams();
                for (final OutputDataStream stream : streams) {
                    if (componentBuilder.isOutputDataStreamConsumed(stream)) {
                        final AnalysisJobBuilder childJobBuilder =
                                componentBuilder.getOutputDataStreamJobBuilder(stream);
                        if (relevantAnalysisJobBuilders.contains(childJobBuilder)) {
                            importantComponent = true;
                        } else {
                            // remove irrelevant output data stream job builder
                            childJobBuilder.removeAllComponents();
                        }
                    }
                }

                if (!importantComponent && componentBuilder instanceof AnalyzerComponentBuilder) {
                    // remove analyzers because they are generally more
                    // heavy-weight and they produce no dependencies for other
                    // components
                    relevantAnalysisJobBuilder.removeComponent(componentBuilder);
                }

                if (!importantComponent) {
                    // remove the components that are not configured.
                    if (!componentBuilder.isConfigured(false)) {
                        relevantAnalysisJobBuilder.removeComponent(componentBuilder);
                    }
                }

            }
        }
    }

    /**
     * Creates a list with _just_ the relevant {@link AnalysisJobBuilder}s to
     * include in the preview job
     *
     * @param ajb
     * @return
     */
    private static List createRelevantAnalysisJobBuildersList(AnalysisJobBuilder ajb) {
        final List relevantAnalysisJobBuilders = new LinkedList<>();
        relevantAnalysisJobBuilders.add(ajb);
        while (!ajb.isRootJobBuilder()) {
            ajb = ajb.getParentJobBuilder();
        }
        return relevantAnalysisJobBuilders;
    }

    public static AnalysisJobBuilder findAnalysisJobBuilder(final AnalysisJobBuilder analysisJobBuilder,
            final String jobBuilderIdentifier) {
        if (jobBuilderIdentifier
                .equals(analysisJobBuilder.getAnalysisJobMetadata().getProperties().get(METADATA_PROPERTY_MARKER))) {
            return analysisJobBuilder;
        }

        final List childJobBuilders = analysisJobBuilder.getConsumedOutputDataStreamsJobBuilders();
        for (final AnalysisJobBuilder childJobBuilder : childJobBuilders) {
            final AnalysisJobBuilder result = findAnalysisJobBuilder(childJobBuilder, jobBuilderIdentifier);
            if (result != null) {
                return result;
            }
        }

        return null;
    }

    public static boolean hasFilterPresent(final SourceColumnFinder scf, final ComponentBuilder acb) {
        return scf.findAllSourceJobs(acb).stream().filter(o -> o instanceof HasFilterOutcomes).findAny().isPresent();
    }
}