All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.beans.transform.StringPatternMatcherTransformer Maven / Gradle / Ivy

There is a newer version: 6.0.0
Show newest version
/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.beans.transform;

import javax.inject.Named;

import org.datacleaner.api.Categorized;
import org.datacleaner.api.Close;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Provided;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.ImproveSuperCategory;
import org.datacleaner.components.categories.ReferenceDataCategory;
import org.datacleaner.components.convert.ConvertToStringTransformer;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.reference.StringPattern;
import org.datacleaner.reference.StringPatternConnection;

@Named("String pattern matcher")
@Description("Matches string values against a set of string patterns, producing a corresponding set "
        + "of output columns specifying whether or not the values matched those string patterns")
@Categorized(superCategory = ImproveSuperCategory.class, value = ReferenceDataCategory.class)
public class StringPatternMatcherTransformer implements Transformer {

    @Configured
    StringPattern[] _stringPatterns;

    @Configured
    InputColumn _column;

    @Configured
    MatchOutputType _outputType = MatchOutputType.TRUE_FALSE;

    @Provided
    DataCleanerConfiguration _configuration;

    private StringPatternConnection[] stringPatternConnections;

    public StringPatternMatcherTransformer(final InputColumn column, final StringPattern[] stringPatterns,
            final DataCleanerConfiguration configuration) {
        this();
        _column = column;
        _stringPatterns = stringPatterns;
        _configuration = configuration;
    }

    public StringPatternMatcherTransformer() {
    }

    @Initialize
    public void init() {
        stringPatternConnections = new StringPatternConnection[_stringPatterns.length];
        for (int i = 0; i < _stringPatterns.length; i++) {
            stringPatternConnections[i] = _stringPatterns[i].openConnection(_configuration);
        }
    }

    @Close
    public void close() {
        if (stringPatternConnections != null) {
            for (final StringPatternConnection stringPatternConnection : stringPatternConnections) {
                stringPatternConnection.close();
            }
            stringPatternConnections = null;
        }
    }

    @Override
    public OutputColumns getOutputColumns() {
        final String columnName = _column.getName();
        final String[] names = new String[_stringPatterns.length];
        for (int i = 0; i < names.length; i++) {
            names[i] = columnName + " '" + _stringPatterns[i].getName() + "'";
        }
        final Class[] types = new Class[_stringPatterns.length];
        for (int i = 0; i < types.length; i++) {
            types[i] = _outputType.getOutputClass();
        }
        return new OutputColumns(names, types);
    }

    @Override
    public Object[] transform(final InputRow inputRow) {
        final Object value = inputRow.getValue(_column);
        return doMatching(value);
    }

    public Object[] doMatching(final Object value) {
        final Object[] result = new Object[stringPatternConnections.length];
        final String stringValue = ConvertToStringTransformer.transformValue(value);

        for (int i = 0; i < result.length; i++) {
            final boolean matches = stringPatternConnections[i].matches(stringValue);
            if (_outputType == MatchOutputType.TRUE_FALSE) {
                result[i] = matches;
            } else if (_outputType == MatchOutputType.INPUT_OR_NULL) {
                if (matches) {
                    result[i] = stringValue;
                } else {
                    result[i] = null;
                }
            }
        }
        return result;
    }

    public void setStringPatterns(final StringPattern[] stringPatterns) {
        _stringPatterns = stringPatterns;
    }

    public void setColumn(final InputColumn column) {
        _column = column;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy