All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.job.JaxbJobWriter Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.job;

import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;

import org.apache.metamodel.MetaModelHelper;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.TableType;
import org.datacleaner.api.ColumnProperty;
import org.datacleaner.api.ExpressionBasedInputColumn;
import org.datacleaner.api.InputColumn;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.connection.OutputDataStreamDatastore;
import org.datacleaner.connection.SchemaNavigator;
import org.datacleaner.data.MutableInputColumn;
import org.datacleaner.descriptors.ComponentDescriptor;
import org.datacleaner.descriptors.ConfiguredPropertyDescriptor;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.jaxb.AnalysisType;
import org.datacleaner.job.jaxb.AnalyzerType;
import org.datacleaner.job.jaxb.ColumnType;
import org.datacleaner.job.jaxb.ColumnsType;
import org.datacleaner.job.jaxb.ComponentType;
import org.datacleaner.job.jaxb.ConfiguredPropertiesType;
import org.datacleaner.job.jaxb.ConfiguredPropertiesType.Property;
import org.datacleaner.job.jaxb.DataContextType;
import org.datacleaner.job.jaxb.DescriptorType;
import org.datacleaner.job.jaxb.FilterType;
import org.datacleaner.job.jaxb.InputType;
import org.datacleaner.job.jaxb.Job;
import org.datacleaner.job.jaxb.JobMetadataType;
import org.datacleaner.job.jaxb.JobType;
import org.datacleaner.job.jaxb.MetadataProperties;
import org.datacleaner.job.jaxb.ObjectFactory;
import org.datacleaner.job.jaxb.OutcomeType;
import org.datacleaner.job.jaxb.OutputDataStreamType;
import org.datacleaner.job.jaxb.OutputType;
import org.datacleaner.job.jaxb.SourceType;
import org.datacleaner.job.jaxb.TransformationType;
import org.datacleaner.job.jaxb.TransformerType;
import org.datacleaner.job.jaxb.VariableType;
import org.datacleaner.job.jaxb.VariablesType;
import org.datacleaner.metadata.TemplateMetadata;
import org.datacleaner.util.JaxbValidationEventHandler;
import org.datacleaner.util.convert.StringConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Strings;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;

public class JaxbJobWriter implements JobWriter {

    private static final String COLUMN_PATH_QUALIFICATION_FULL = "full";
    private static final String COLUMN_PATH_QUALIFICATION_TABLE = "table";
    private static final String COLUMN_PATH_QUALIFICATION_COLUMN = "column";

    private static final Logger logger = LoggerFactory.getLogger(JaxbJobWriter.class);

    private final DataCleanerConfiguration _configuration;
    private final JAXBContext _jaxbContext;
    private final JaxbJobMetadataFactory _jobMetadataFactory;

    public JaxbJobWriter(final DataCleanerConfiguration configuration,
            final JaxbJobMetadataFactory jobMetadataFactory) {
        _configuration = configuration;
        _jobMetadataFactory = jobMetadataFactory;
        try {
            _jaxbContext = JAXBContext
                    .newInstance(ObjectFactory.class.getPackage().getName(), ObjectFactory.class.getClassLoader());
        } catch (final Exception e) {
            throw new IllegalStateException(e);
        }
    }

    public JaxbJobWriter(final DataCleanerConfiguration configuration) {
        this(configuration, new JaxbJobMetadataFactoryImpl());
    }

    private static String getColumnId(final InputColumn inputColumn,
            final BiMap, String> columnMappings) {
        if (inputColumn == null) {
            throw new IllegalArgumentException("InputColumn cannot be null");
        }

        String id = columnMappings.get(inputColumn);
        if (id == null) {
            final String baseColumnId = getBaseColumnId(inputColumn);
            id = baseColumnId;
            int addition = 1;
            while (columnMappings.containsValue(id)) {
                addition++;
                id = baseColumnId + addition;
            }
            columnMappings.put(inputColumn, id);
        }
        return id;
    }

    private static String getBaseColumnId(final InputColumn inputColumn) {
        String cleansedColumnName = "col_" + Strings.nullToEmpty(inputColumn.getName());
        cleansedColumnName = cleansedColumnName.toLowerCase().trim();
        cleansedColumnName = cleansedColumnName.replaceAll("[^a-z0-9_]", "");
        return cleansedColumnName;
    }

    @Override
    public void write(final AnalysisJob analysisJob, final OutputStream outputStream) {
        logger.debug("write({},{}}", analysisJob, outputStream);

        final Job job = new Job();
        configureJobType(analysisJob, job, true);

        try {
            final Marshaller marshaller = _jaxbContext.createMarshaller();
            marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
            marshaller.setEventHandler(new JaxbValidationEventHandler());
            marshaller.marshal(job, outputStream);
        } catch (final JAXBException e) {
            throw new IllegalStateException(e);
        }
    }

    private void configureJobType(final AnalysisJob analysisJob, final JobType jobType, final boolean includeMetadata) {
        final BiMap, String> columnMappings = HashBiMap.create(50);
        configureJobType(analysisJob, jobType, columnMappings, includeMetadata);
    }

    private void configureJobType(final AnalysisJob analysisJob, final JobType jobType,
            final BiMap, String> columnMappings, final boolean includeMetadata) {
        if (includeMetadata) {
            try {
                final JobMetadataType jobMetadata = _jobMetadataFactory.create(analysisJob);
                jobType.setJobMetadata(jobMetadata);
            } catch (final Exception e) {
                logger.warn("Exception occurred while creating job metadata", e);
            }
        }

        final VariablesType variablesType = new VariablesType();
        final Map variables = analysisJob.getMetadata().getVariables();
        if ((variables != null) && (variables.size() > 0)) {
            for (final Map.Entry variableEntry : variables.entrySet()) {
                final VariableType variableType = new VariableType();
                variableType.setId(variableEntry.getKey());
                variableType.setValue(variableEntry.getValue());
                variablesType.getVariable().add(variableType);
            }
        }

        final SourceType sourceType = new SourceType();
        sourceType.setColumns(new ColumnsType());
        if ((variables != null) && (variables.size() > 0)) {
            sourceType.setVariables(variablesType);
        }
        jobType.setSource(sourceType);

        final Datastore datastore = analysisJob.getDatastore();
        if (!(datastore instanceof OutputDataStreamDatastore)) {
            final DataContextType dataContextType = new DataContextType();
            if (datastore == null) {
                logger.warn("No datastore specified for analysis job: {}", analysisJob);
            } else {
                dataContextType.setRef(datastore.getName());
            }
            sourceType.setDataContext(dataContextType);
        }

        final Map outcomeMappings = new LinkedHashMap<>();

        // mappings for lookup of component's elements
        final Map transformerMappings = new LinkedHashMap<>();
        final Map filterMappings = new LinkedHashMap<>();
        final Map analyzerMappings = new LinkedHashMap<>();

        // register all source columns
        final Collection> sourceColumns = analysisJob.getSourceColumns();
        final String columnPathQualification = getColumnPathQualification(datastore, sourceColumns);
        for (final InputColumn inputColumn : sourceColumns) {
            final ColumnType jaxbColumn = new ColumnType();
            final Column physicalColumn = inputColumn.getPhysicalColumn();
            jaxbColumn.setPath(getColumnPath(physicalColumn, columnPathQualification));
            jaxbColumn.setId(getColumnId(inputColumn, columnMappings));

            final org.apache.metamodel.schema.ColumnType columnType = physicalColumn.getType();
            if (columnType != null) {
                jaxbColumn.setType(columnType.toString());
            }

            sourceType.getColumns().getColumn().add(jaxbColumn);
        }

        // adds all components to the job and their corresponding mappings
        addComponents(jobType, analysisJob, columnMappings, transformerMappings, filterMappings, analyzerMappings);

        // add all transformed columns to their originating components and the
        // mappings
        addTransformedColumns(columnMappings, transformerMappings);

        // register all requirements
        addRequirements(outcomeMappings, transformerMappings, filterMappings, analyzerMappings, columnMappings);

        addConfiguration(analysisJob, transformerMappings, filterMappings, analyzerMappings, columnMappings);
    }

    private String getColumnPath(final Column column, final String columnPathQualification) {
        switch (columnPathQualification) {
        case COLUMN_PATH_QUALIFICATION_COLUMN:
            final String columnName = column.getName();
            if (Strings.isNullOrEmpty(columnName)) {
                return column.getTable().getName() + '.' + column.getName();
            }
            return column.getName();
        case COLUMN_PATH_QUALIFICATION_TABLE:
            return column.getTable().getName() + '.' + column.getName();
        case COLUMN_PATH_QUALIFICATION_FULL:
        default:
            return column.getQualifiedLabel();
        }
    }

    private String getColumnPathQualification(final Datastore datastore,
            final Collection> sourceColumns) {
        if (datastore == null || sourceColumns == null || sourceColumns.isEmpty()) {
            return COLUMN_PATH_QUALIFICATION_FULL;
        }

        try (DatastoreConnection connection = datastore.openConnection()) {
            final SchemaNavigator schemaNavigator = connection.getSchemaNavigator();
            final Schema[] schemas = schemaNavigator.getSchemas();
            Schema singleSchema = null;
            int realSchemas = 0;
            for (final Schema schema : schemas) {
                if (!MetaModelHelper.isInformationSchema(schema)) {
                    realSchemas++;
                    singleSchema = schema;
                }
            }

            if (realSchemas == 1) {
                final long tableCount =
                        singleSchema.getTables().stream().filter(t -> t.getType() == TableType.TABLE).count();
                if (tableCount == 1L) {
                    return COLUMN_PATH_QUALIFICATION_COLUMN;
                }
                return COLUMN_PATH_QUALIFICATION_TABLE;
            }

            return COLUMN_PATH_QUALIFICATION_FULL;
        }
    }

    private void addConfiguration(final AnalysisJob analysisJob,
            final Map transformerMappings,
            final Map filterMappings, final Map analyzerMappings,
            final BiMap, String> columnMappings) {

        final StringConverter stringConverter = new StringConverter(_configuration, analysisJob);

        // configure transformers
        for (final Entry entry : transformerMappings.entrySet()) {
            final TransformerJob job = entry.getKey();
            final TransformerType elementType = entry.getValue();
            final ComponentConfiguration configuration = job.getConfiguration();

            Set configuredProperties =
                    job.getDescriptor().getConfiguredPropertiesForInput();
            elementType.getInput().addAll(createInputConfiguration(configuration, configuredProperties, columnMappings,
                    stringConverter));

            configuredProperties = job.getDescriptor().getConfiguredProperties();
            elementType.setProperties(createPropertyConfiguration(configuration, configuredProperties, stringConverter,
                    job.getMetadataProperties()));
            elementType.setMetadataProperties(createMetadataProperties(job.getMetadataProperties()));
        }

        // configure filters
        for (final Entry entry : filterMappings.entrySet()) {
            final FilterJob job = entry.getKey();
            final FilterType elementType = entry.getValue();
            final ComponentConfiguration configuration = job.getConfiguration();

            Set configuredProperties =
                    job.getDescriptor().getConfiguredPropertiesForInput();
            elementType.getInput().addAll(createInputConfiguration(configuration, configuredProperties, columnMappings,
                    stringConverter));

            configuredProperties = job.getDescriptor().getConfiguredProperties();
            elementType.setProperties(createPropertyConfiguration(configuration, configuredProperties, stringConverter,
                    job.getMetadataProperties()));
            elementType.setMetadataProperties(createMetadataProperties(job.getMetadataProperties()));
        }

        // configure analyzers
        for (final Entry entry : analyzerMappings.entrySet()) {
            final AnalyzerJob job = entry.getKey();
            final Map metadataProperties = job.getMetadataProperties();
            final String builderId = metadataProperties.get(AnalyzerComponentBuilder.METADATA_PROPERTY_BUILDER_ID);

            final AnalyzerType elementType = entry.getValue();
            final ComponentConfiguration configuration = job.getConfiguration();
            Set configuredProperties =
                    job.getDescriptor().getConfiguredPropertiesForInput();

            if (builderId != null && !elementType.getInput().isEmpty()) {
                // this is the continuation of an already created AnalyzerType.
                // Only add the input columns which have escalated to multiple
                // jobs.

                final Set escalatingInputProperties = new LinkedHashSet<>();
                for (final ConfiguredPropertyDescriptor propertyDescriptor : configuredProperties) {
                    final ColumnProperty columnProperty = propertyDescriptor.getAnnotation(ColumnProperty.class);
                    if (columnProperty != null && columnProperty.escalateToMultipleJobs()) {
                        escalatingInputProperties.add(propertyDescriptor);
                    }
                }

                elementType.getInput()
                        .addAll(createInputConfiguration(configuration, escalatingInputProperties, columnMappings,
                                stringConverter, true));
            } else {

                elementType.getInput()
                        .addAll(createInputConfiguration(configuration, configuredProperties, columnMappings,
                                stringConverter));

                configuredProperties = job.getDescriptor().getConfiguredProperties();
                elementType.setProperties(
                        createPropertyConfiguration(configuration, configuredProperties, stringConverter,
                                metadataProperties));
                elementType.setMetadataProperties(createMetadataProperties(metadataProperties));
            }
        }
    }

    private MetadataProperties createMetadataProperties(final Map metadataProperties) {
        if (metadataProperties == null || metadataProperties.isEmpty()) {
            return null;
        }
        final MetadataProperties result = new MetadataProperties();
        final Set> entries = metadataProperties.entrySet();
        for (final Entry entry : entries) {
            final String key = entry.getKey();
            if (key.startsWith(JaxbJobReader.DATACLEANER_JAXB_VARIABLE_PREFIX)) {
                continue;
            }
            if (key.equals(AnalyzerComponentBuilder.METADATA_PROPERTY_BUILDER_ID)) {
                continue;
            }
            if (key.equals(AnalyzerComponentBuilder.METADATA_PROPERTY_BUILDER_PARTITION_INDEX)) {
                continue;
            }
            final org.datacleaner.job.jaxb.MetadataProperties.Property property =
                    new org.datacleaner.job.jaxb.MetadataProperties.Property();
            property.setName(key);
            property.setValue(entry.getValue());
            result.getProperty().add(property);
        }

        if (result.getProperty().isEmpty()) {
            return null;
        } else {
            return result;
        }
    }

    private List createInputConfiguration(final ComponentConfiguration configuration,
            final Set configuredProperties,
            final BiMap, String> columnMappings, final StringConverter stringConverter) {
        return createInputConfiguration(configuration, configuredProperties, columnMappings, stringConverter,
                configuredProperties.size() > 1);
    }

    private List createInputConfiguration(final ComponentConfiguration configuration,
            Set configuredProperties, final BiMap, String> columnMappings,
            final StringConverter stringConverter, final boolean nameInputColumns) {

        // sort the properties in order to make the result deterministic
        configuredProperties = new TreeSet<>(configuredProperties);

        final List result = new ArrayList<>();
        for (final ConfiguredPropertyDescriptor property : configuredProperties) {
            if (property.isInputColumn()) {
                final Object value = configuration.getProperty(property);
                if (value != null) {
                    final InputColumn[] columns;
                    if (property.isArray()) {
                        columns = (InputColumn[]) value;
                    } else {
                        columns = new InputColumn[1];
                        columns[0] = (InputColumn) value;
                    }

                    for (final InputColumn inputColumn : columns) {
                        if (inputColumn != null) {
                            final InputType inputType = new InputType();
                            if (inputColumn instanceof ExpressionBasedInputColumn) {
                                final ExpressionBasedInputColumn expressionBasedInputColumn =
                                        (ExpressionBasedInputColumn) inputColumn;
                                final Object columnValue = expressionBasedInputColumn.getExpression();
                                inputType.setValue(
                                        stringConverter.serialize(columnValue, property.createCustomConverter()));
                            } else {
                                inputType.setRef(getColumnId(inputColumn, columnMappings));
                            }
                            if (nameInputColumns) {
                                inputType.setName(property.getName());
                            }
                            result.add(inputType);
                        }
                    }
                }
            }
        }
        return result;
    }

    private ConfiguredPropertiesType createPropertyConfiguration(final ComponentConfiguration configuration,
            Set configuredProperties, final StringConverter stringConverter,
            final Map componentMetadataProperties) {

        // sort the properties in order to make the result deterministic
        configuredProperties = new TreeSet<>(configuredProperties);

        final List result = new ArrayList<>();
        for (final ConfiguredPropertyDescriptor property : configuredProperties) {
            if (!property.isInputColumn()) {
                final Property propertyType = new Property();
                propertyType.setName(property.getName());

                final String variableNameWithPrefix =
                        JaxbJobReader.DATACLEANER_JAXB_VARIABLE_PREFIX + property.getName();
                final String variableNameWithTemplatePrefix = TemplateMetadata.TEMPLATE_VALUE + property.getName();
                if (componentMetadataProperties.containsKey(variableNameWithPrefix)) {
                    propertyType.setRef(componentMetadataProperties.get(variableNameWithPrefix));
                } else if (componentMetadataProperties.containsKey(variableNameWithTemplatePrefix)) {
                    propertyType.setTemplate(componentMetadataProperties.get(variableNameWithTemplatePrefix));
                } else {
                    final Object value = configuration.getProperty(property);
                    final String stringValue = stringConverter.serialize(value, property.createCustomConverter());

                    if (stringValue != null && stringValue.indexOf('\n') != -1) {
                        // multi-line values are put as simple content of the
                        // property
                        propertyType.setValue(stringValue);
                    } else {
                        // single-line values are preferred as an attribute for
                        // backwards compatibility
                        propertyType.setValueAttribute(stringValue);
                    }
                }
                result.add(propertyType);
            }
        }
        final ConfiguredPropertiesType configuredPropertiesType = new ConfiguredPropertiesType();
        configuredPropertiesType.getProperty().addAll(result);
        return configuredPropertiesType;
    }

    private void addTransformedColumns(final BiMap, String> columnMappings,
            final Map transformerMappings) {
        // register all transformed columns
        for (final Entry entry : transformerMappings.entrySet()) {
            final TransformerJob transformerJob = entry.getKey();
            final TransformerType transformerType = entry.getValue();
            final InputColumn[] columns = transformerJob.getOutput();
            for (final InputColumn inputColumn : columns) {
                final String id = getColumnId(inputColumn, columnMappings);
                final OutputType outputType = new OutputType();
                outputType.setId(id);
                outputType.setName(inputColumn.getName());
                if (inputColumn instanceof MutableInputColumn) {
                    final boolean hidden = ((MutableInputColumn) inputColumn).isHidden();
                    if (hidden) {
                        outputType.setHidden(hidden);
                    }
                }

                transformerType.getOutput().add(outputType);
            }
        }
    }

    private void addRequirements(final Map outcomeMappings,
            final Map transformerMappings,
            final Map filterMappings, final Map analyzerMappings,
            final Map, String> columnMappings) {

        // add requirements based on all transformer requirements
        for (final Entry entry : transformerMappings.entrySet()) {
            final TransformerJob job = entry.getKey();
            final ComponentRequirement requirement = job.getComponentRequirement();
            if (requirement != null) {
                final String id = getId(requirement, outcomeMappings);
                entry.getValue().setRequires(id);
            }
        }

        // add requirements based on all filter requirements
        for (final Entry entry : filterMappings.entrySet()) {
            final FilterJob job = entry.getKey();
            final ComponentRequirement requirement = job.getComponentRequirement();
            if (requirement != null) {
                final String id = getId(requirement, outcomeMappings);
                entry.getValue().setRequires(id);
            }
        }

        // add requirements based on all analyzer requirements
        for (final Entry entry : analyzerMappings.entrySet()) {
            final AnalyzerJob job = entry.getKey();
            final ComponentRequirement requirement = job.getComponentRequirement();
            if (requirement != null) {
                final String id = getId(requirement, outcomeMappings);
                entry.getValue().setRequires(id);
            }
        }

        // add outcome elements only for those filter requirements that
        // have been mapped
        for (final Entry entry : filterMappings.entrySet()) {
            final FilterJob job = entry.getKey();
            final FilterType filterType = entry.getValue();
            final Collection outcomes = job.getFilterOutcomes();
            for (final FilterOutcome outcome : outcomes) {
                // note that we DONT use the getId(...) method here
                final String id = getId(outcome, outcomeMappings, false);
                // only the outcome element if it is being mapped
                if (id != null) {
                    final OutcomeType outcomeType = new OutcomeType();
                    outcomeType.setCategory(outcome.getCategory().name());
                    outcomeType.setId(id);
                    filterType.getOutcome().add(outcomeType);
                }
            }
        }
    }

    private String getId(final ComponentRequirement requirement, final Map outcomeMappings) {
        if (requirement instanceof AnyComponentRequirement) {
            return AnyComponentRequirement.KEYWORD;
        }

        if (requirement instanceof SimpleComponentRequirement) {
            final FilterOutcome outcome = ((SimpleComponentRequirement) requirement).getOutcome();
            return getId(outcome, outcomeMappings, true);
        }

        if (requirement instanceof CompoundComponentRequirement) {
            final Set outcomes = ((CompoundComponentRequirement) requirement).getOutcomes();
            final StringBuilder sb = new StringBuilder();
            for (final FilterOutcome outcome : outcomes) {
                if (sb.length() != 0) {
                    sb.append(" OR ");
                }
                final String id = getId(outcome, outcomeMappings, true);
                sb.append(id);
            }
            return sb.toString();
        }

        throw new UnsupportedOperationException("Unsupported ComponentRequirement type: " + requirement);
    }

    private String getId(final FilterOutcome outcome, final Map outcomeMappings,
            final boolean create) {
        String id = outcomeMappings.get(outcome);
        if (id == null) {
            if (create) {
                id = "outcome_" + outcomeMappings.size();
                outcomeMappings.put(outcome, id);
            }
        }
        return id;
    }

    private void addComponents(final JobType jobType, final AnalysisJob analysisJob,
            final BiMap, String> columnMappings,
            final Map transformerMappings,
            final Map filterMappings, final Map analyzerMappings) {
        final TransformationType transformationType = new TransformationType();
        jobType.setTransformation(transformationType);

        final AnalysisType analysisType = new AnalysisType();
        jobType.setAnalysis(analysisType);

        // add all transformers to the transformation element
        final Collection transformerJobs = analysisJob.getTransformerJobs();
        for (final TransformerJob transformerJob : transformerJobs) {
            final TransformerType transformerType = new TransformerType();
            transformerType.setName(transformerJob.getName());
            setDescriptor(transformerType, transformerJob.getDescriptor());

            addOutputDataStreams(transformerType, transformerJob, columnMappings);

            transformationType.getTransformerOrFilter().add(transformerType);
            transformerMappings.put(transformerJob, transformerType);
        }

        // add all filters to the transformation element
        final Collection filterJobs = analysisJob.getFilterJobs();
        for (final FilterJob filterJob : filterJobs) {
            final FilterType filterType = new FilterType();
            filterType.setName(filterJob.getName());
            setDescriptor(filterType, filterJob.getDescriptor());

            addOutputDataStreams(filterType, filterJob, columnMappings);

            transformationType.getTransformerOrFilter().add(filterType);
            filterMappings.put(filterJob, filterType);
        }

        // add all analyzers to the analysis element
        final Collection analyzerJobs = analysisJob.getAnalyzerJobs();
        final Map analyzerTypesByBuilderId = new HashMap<>();

        for (final AnalyzerJob analyzerJob : analyzerJobs) {
            final String builderId =
                    analyzerJob.getMetadataProperties().get(AnalyzerComponentBuilder.METADATA_PROPERTY_BUILDER_ID);
            if (builderId != null && analyzerTypesByBuilderId.containsKey(builderId)) {
                // reuse the AnalyzerType to retain the single builder upon job
                // reading
                final AnalyzerType analyzerType = analyzerTypesByBuilderId.get(builderId);
                analyzerMappings.put(analyzerJob, analyzerType);
            } else {
                final AnalyzerType analyzerType = new AnalyzerType();
                analyzerType.setName(analyzerJob.getName());
                setDescriptor(analyzerType, analyzerJob.getDescriptor());

                addOutputDataStreams(analyzerType, analyzerJob, columnMappings);

                analysisType.getAnalyzer().add(analyzerType);
                analyzerMappings.put(analyzerJob, analyzerType);
                if (builderId != null) {
                    analyzerTypesByBuilderId.put(builderId, analyzerType);
                }
            }
        }
    }

    private void addOutputDataStreams(final ComponentType componentType, final ComponentJob componentJob,
            final BiMap, String> columnMappings) {
        final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs();

        for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) {
            final OutputDataStreamType outputDataStreamType = new OutputDataStreamType();
            outputDataStreamType.setName(outputDataStreamJob.getOutputDataStream().getName());
            final JobType childJobType = new JobType();
            configureJobType(outputDataStreamJob.getJob(), childJobType, columnMappings, false);
            outputDataStreamType.setJob(childJobType);
            componentType.getOutputDataStream().add(outputDataStreamType);
        }
    }

    private void setDescriptor(final ComponentType componentType, final ComponentDescriptor descriptor) {
        final DescriptorType descriptorType = new DescriptorType();
        descriptorType.setRef(descriptor.getDisplayName());
        componentType.setDescriptor(descriptorType);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy