All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.genmodel.mojopipeline.transformers.StringGrepTransform Maven / Gradle / Ivy

package hex.genmodel.mojopipeline.transformers;

import ai.h2o.mojos.runtime.api.backend.ReaderBackend;
import ai.h2o.mojos.runtime.frame.MojoFrame;
import ai.h2o.mojos.runtime.frame.MojoFrameMeta;
import ai.h2o.mojos.runtime.transforms.MojoTransform;
import ai.h2o.mojos.runtime.transforms.MojoTransformBuilderFactory;
import hex.genmodel.mojopipeline.parsing.ParameterParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class StringGrepTransform extends MojoTransform {
    Pattern _pattern =  null;
    Boolean _invert = null;

    StringGrepTransform(int[] iindices, int[] oindices, Pattern pattern, Boolean invert) {
        super(iindices, oindices);
        _pattern = pattern;
        _invert = invert;
    }

    @Override
    public void transform(MojoFrame frame) {
        String[] a = (String[]) frame.getColumnData(iindices[0]);
        double[] o = (double[]) frame.getColumnData(oindices[0]);
        Matcher matcher = _pattern.matcher("");
        for (int i = 0, nrows = frame.getNrows(); i < nrows; i++) {
            if (a[i] == null) {
                o[i] = _invert ? 1 : 0;
            } else {
                matcher.reset(a[i]);
                o[i] = matcher.find() != _invert ? 1 : 0;
            }
        }
    }

    public static class Factory implements MojoTransformBuilderFactory {

        public static final String TRANSFORMER_ID = "hex.genmodel.mojopipeline.transformers.StringGrepTransform";
       
        public static boolean functionExists(String functionName) {
            return functionName.equals("grep");
        }

        @Override
        public String transformerName() {
            return TRANSFORMER_ID;
        }

        @Override
        public MojoTransform createBuilder(MojoFrameMeta meta,
                                           int[] iindcies, int[] oindices,
                                           Map params,
                                           ReaderBackend backend) {
            Object ignoreCaseObj = params.get("ignore_case");
            if (ignoreCaseObj == null) {
                throw new IllegalArgumentException("The 'ignore_case' param is not passed to 'grep' function!");
            }
            boolean ignoreCase = ParameterParser.paramValueToBoolean(ignoreCaseObj);
            int flags = ignoreCase ? Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE : 0;

            Object invertObj = params.get("invert");
            if (invertObj == null) {
                throw new IllegalArgumentException("The 'invert' param is not passed to 'grep' function!");
            }
            boolean invert = ParameterParser.paramValueToBoolean(invertObj);

            Object outputLogicalObj = params.get("output_logical");
            if (outputLogicalObj == null) {
                throw new IllegalArgumentException("The 'output_logical' param is not passed to 'grep' function!");
            }
            boolean outputLogical = ParameterParser.paramValueToBoolean(outputLogicalObj);
            if (!outputLogical) {
                throw new IllegalArgumentException("The 'grep' operation in MOJO supports just logical output!");
            }

            Object patternObj = params.get("regex");
            if (patternObj == null) {
                throw new IllegalArgumentException("The 'pattern' param is not passed to 'grep' function!");
            }
            String stringPattern = (String)patternObj;
            Pattern pattern = Pattern.compile(stringPattern, flags);
            
            return new StringGrepTransform(
                iindcies,
                oindices,
                pattern,
                invert);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy