All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.components.groovy.GroovyAdvancedTransformer Maven / Gradle / Ivy

The newest version!
package org.datacleaner.components.groovy;

import groovy.lang.GroovyClassLoader;
import groovy.lang.GroovyObject;

import java.util.LinkedHashMap;
import java.util.Map;

import javax.inject.Inject;
import javax.inject.Named;

import org.datacleaner.api.Categorized;
import org.datacleaner.api.Close;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.NumberProperty;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.OutputRowCollector;
import org.datacleaner.api.Provided;
import org.datacleaner.api.StringProperty;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.ScriptingCategory;
import org.datacleaner.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Named("Groovy transformer (advanced)")
@Categorized(ScriptingCategory.class)
@Description("Perform almost any kind of data transformation with the use of the Groovy language. This transformer includes advanced options to map records to multiple (or no) output records and more.")
public class GroovyAdvancedTransformer implements Transformer {

    private static final Logger logger = LoggerFactory.getLogger(GroovyAdvancedTransformer.class);

    @Configured(order = 1)
    InputColumn[] inputs;

    @Configured(order = 2)
    @Description("Execute the transformation code in a concurrent manner?")
    boolean concurrent = true;

    @Configured(order = 3)
    @NumberProperty(negative = false)
    @Description("The number of field to expect in the output")
    int outputFields = 2;

    @Configured(order = 4)
    @StringProperty(multiline = true, mimeType = { "application/x-groovy", "text/x-groovy", "text/groovy" })
    String code = "class Transformer {\n\tvoid initialize() {\n\t\t// Optional initializer\n\t}\n\n\tvoid transform(map, outputCollector) {\n\t\t// Example: Makes an output record for each field+value in input\n\t\tmap.each{\n\t\t\tk, v -> outputCollector.putValues(k, v)\n\t\t};\n\t}\n\n\tvoid close() {\n\t\t// Optional destroyer\n\t}\n}";

    @Inject
    @Provided
    OutputRowCollector _outputRowCollector;

    private GroovyObject _groovyObject;
    private GroovyClassLoader _groovyClassLoader;

    @Initialize
    public void init() {
        ClassLoader parent = getClass().getClassLoader();
        _groovyClassLoader = new GroovyClassLoader(parent);
        logger.debug("Compiling Groovy code:\n{}", code);
        final Class groovyClass = _groovyClassLoader.parseClass(code);
        _groovyObject = (GroovyObject) ReflectionUtils.newInstance(groovyClass);
        _groovyObject.invokeMethod("initialize", new Object[] {});
    }

    @Close
    public void close() {
        _groovyObject.invokeMethod("close", new Object[] {});
        _groovyObject = null;
        _groovyClassLoader.clearCache();
        _groovyClassLoader = null;
    }

    public OutputColumns getOutputColumns() {
        String[] names = new String[outputFields];
        for (int i = 0; i < outputFields; i++) {
            names[i] = "Groovy output (" + (i + 1) + ")";
        }
        return new OutputColumns(String.class, names);
    }

    public String[] transform(InputRow inputRow) {
        final Map map = new LinkedHashMap();
        for (InputColumn input : inputs) {
            map.put(input.getName(), inputRow.getValue(input));
        }
        final Object[] args = new Object[] { map, _outputRowCollector };
        if (concurrent) {
            _groovyObject.invokeMethod("transform", args);
        } else {
            synchronized (_groovyObject) {
                _groovyObject.invokeMethod("transform", args);
            }
        }
        return null;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy