All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.dataframe.mapping.StringMapUtils Maven / Gradle / Ivy

Go to download

High-performance Java Dataframe with integrated columnar storage (fork of tablesaw)

There is a newer version: 0.9.1
Show newest version
package org.datavec.dataframe.mapping;

import org.datavec.dataframe.api.CategoryColumn;
import org.datavec.dataframe.api.FloatColumn;
import org.datavec.dataframe.columns.Column;
import com.google.common.base.Strings;
import org.apache.commons.lang3.StringUtils;

/**
 *
 */
public interface StringMapUtils extends Column {
    /**
     * String utility functions. Each function takes one or more String columns as input and produces
     * another Column as output. The resulting column need not be a string column.
     */

    default CategoryColumn upperCase() {
        CategoryColumn newColumn = CategoryColumn.create(this.name() + "[ucase]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            if (value == null) {
                newColumn.set(r, null);
            } else {
                newColumn.set(r, value.toUpperCase());
            }
        }
        return newColumn;
    }

    default CategoryColumn lowerCase() {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[lcase]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.toLowerCase());
        }
        return newColumn;
    }

    default CategoryColumn trim() {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[trim]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.trim());
        }
        return newColumn;
    }

    default CategoryColumn replaceAll(String regex, String replacement) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[repl]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.replaceAll(regex, replacement));
        }
        return newColumn;
    }

    default CategoryColumn replaceFirst(String regex, String replacement) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[repl]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.replaceFirst(regex, replacement));
        }
        return newColumn;
    }

    default CategoryColumn substring(int start, int end) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[sub]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.substring(start, end));
        }
        return newColumn;
    }


    default CategoryColumn substring(int start) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[sub]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, value.substring(start));
        }
        return newColumn;
    }

    default CategoryColumn abbreviate(int maxWidth) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[abbr]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, StringUtils.abbreviate(value, maxWidth));
        }
        return newColumn;
    }

    default CategoryColumn padEnd(int minLength, char padChar) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[pad]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, Strings.padEnd(value, minLength, padChar));
        }
        return newColumn;
    }

    default CategoryColumn padStart(int minLength, char padChar) {

        CategoryColumn newColumn = CategoryColumn.create(name() + "[pad]");

        for (int r = 0; r < size(); r++) {
            String value = getString(r);
            newColumn.set(r, Strings.padStart(value, minLength, padChar));
        }
        return newColumn;
    }

    default CategoryColumn commonPrefix(Column column2) {

        CategoryColumn newColumn = CategoryColumn.create(name() + column2.name() + "[prefix]");

        for (int r = 0; r < size(); r++) {
            String value1 = getString(r);
            String value2 = column2.getString(r);
            newColumn.set(r, Strings.commonPrefix(value1, value2));
        }
        return newColumn;
    }

    default CategoryColumn commonSuffix(Column column2) {

        CategoryColumn newColumn = CategoryColumn.create(name() + column2.name() + "[suffix]");

        for (int r = 0; r < size(); r++) {
            String value1 = getString(r);
            String value2 = column2.getString(r);
            newColumn.set(r, Strings.commonSuffix(value1, value2));
        }
        return newColumn;
    }

    /**
     * Returns a column containing the levenshtein distance between the two given string columns
     */
    default Column distance(Column column2) {

        FloatColumn newColumn = FloatColumn.create(name() + column2.name() + "[distance]");

        for (int r = 0; r < size(); r++) {
            String value1 = getString(r);
            String value2 = column2.getString(r);
            newColumn.set(r, StringUtils.getLevenshteinDistance(value1, value2));
        }
        return newColumn;
    }

    default CategoryColumn join(Column column2, String delimiter) {

        CategoryColumn newColumn = CategoryColumn.create(name() + column2.name() + "[join]");

        for (int r = 0; r < size(); r++) {
            String[] values = new String[2];
            values[0] = getString(r);
            values[1] = column2.getString(r);
            newColumn.set(r, StringUtils.join(values, delimiter));
        }
        return newColumn;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy