All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.components.groovy.GroovyAdvancedTransformer Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.components.groovy;

import groovy.lang.GroovyClassLoader;
import groovy.lang.GroovyObject;

import java.util.LinkedHashMap;
import java.util.Map;

import javax.inject.Inject;
import javax.inject.Named;

import org.datacleaner.api.Categorized;
import org.datacleaner.api.Close;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.NumberProperty;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.OutputRowCollector;
import org.datacleaner.api.Provided;
import org.datacleaner.api.StringProperty;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.ScriptingCategory;
import org.datacleaner.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Named("Groovy transformer (advanced)")
@Categorized(ScriptingCategory.class)
@Description("Perform almost any kind of data transformation with the use of the Groovy language. "
        + "This transformer includes advanced options to map records to multiple (or no) output records and more.")
public class GroovyAdvancedTransformer implements Transformer {

    private static final Logger logger = LoggerFactory.getLogger(GroovyAdvancedTransformer.class);

    @Configured(order = 1)
    InputColumn[] inputs;

    @Configured(order = 2)
    @Description("Execute the transformation code in a concurrent manner?")
    boolean concurrent = true;

    @Configured(order = 3)
    @NumberProperty(negative = false)
    @Description("The number of field to expect in the output")
    int outputFields = 2;

    @Configured(order = 4)
    @StringProperty(multiline = true, mimeType = { "application/x-groovy", "text/x-groovy", "text/groovy" })
    String code = "class Transformer {\n\tvoid initialize() {\n\t\t// Optional initializer\n\t}\n\n"
            + "\tvoid transform(map, outputCollector) {\n\t\t// Example: Makes an output record for each field+value in input\n"
            + "\t\tmap.each{\n\t\t\tk, v -> outputCollector.putValues(k, v)\n" + "\t\t};\n\t}\n\n\tvoid close() {\n"
            + "\t\t// Optional destroyer\n\t}\n}";

    @Inject
    @Provided
    OutputRowCollector _outputRowCollector;

    private GroovyObject _groovyObject;
    private GroovyClassLoader _groovyClassLoader;

    @Initialize
    public void init() {
        final ClassLoader parent = getClass().getClassLoader();
        _groovyClassLoader = new GroovyClassLoader(parent);
        logger.debug("Compiling Groovy code:\n{}", code);
        final Class groovyClass = _groovyClassLoader.parseClass(code);
        _groovyObject = (GroovyObject) ReflectionUtils.newInstance(groovyClass);
        _groovyObject.invokeMethod("initialize", new Object[] {});
    }

    @Close
    public void close() {
        _groovyObject.invokeMethod("close", new Object[] {});
        _groovyObject = null;
        _groovyClassLoader.clearCache();
        _groovyClassLoader = null;
    }

    public OutputColumns getOutputColumns() {
        final String[] names = new String[outputFields];
        for (int i = 0; i < outputFields; i++) {
            names[i] = "Groovy output (" + (i + 1) + ")";
        }
        return new OutputColumns(String.class, names);
    }

    public String[] transform(final InputRow inputRow) {
        final Map map = new LinkedHashMap();
        for (InputColumn input : inputs) {
            map.put(input.getName(), inputRow.getValue(input));
        }
        final Object[] args = new Object[] { map, _outputRowCollector };
        if (concurrent) {
            _groovyObject.invokeMethod("transform", args);
        } else {
            synchronized (_groovyObject) {
                _groovyObject.invokeMethod("transform", args);
            }
        }
        return null;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy