All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.common.utils.CharNormUtils Maven / Gradle / Ivy

package com.mayabot.nlp.common.utils;

import com.mayabot.nlp.common.Guava;

import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class CharNormUtils {

    private static char[] table = new char[65535];

    static {
        AccessController.doPrivileged((PrivilegedAction) () -> {
            try {

                List lines = Guava.readLines(
                        Guava.getResource(CharNormUtils.class, "char_norm"));


                Set left = new HashSet();
                List right = new ArrayList();
                for (int i = 0; i < lines.size(); i++) {
                    String line = lines.get(i);
                    if (line.isEmpty() || !line.contains("=")) {
                        continue;
                    }
                    int x = line.indexOf("=");
                    if (x != 1 && line.length() != 3) {
                        System.err.println("Error " + line);
                        continue;
                    }

                    String first = line.substring(0, 1);
                    String second = line.substring(2, 3);
                    left.add(first);
                    right.add(second);
                    if (first.length() == 1 && second.length() == 1) {
                        table[first.charAt(0)] = second.charAt(0);
                    }

                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            return null;
        });

    }

    public static char convert(char input) {
        char c = table[input];

        if (c != 0) {
            return c;
        } else {
            return input;
        }
    }

    public static void convert(char[] chars) {
        for (int i = 0; i < chars.length; i++) {
            char c = chars[i];
            char to = table[c];
            if (to != 0) {
                chars[i] = to;
            }
        }
    }

    public static String convert(String input) {
        final int len = input.length();
        int find = -1;
        for (int i = 0; i < len; i++) {
            if(table[input.charAt(i)]!=0){
                find = i;
                break;
            }
        }

        if (find == -1) {
            return input;
        }

        char[] chars = input.toCharArray();
        for (int i = find; i < chars.length; i++) {
            char c = chars[i];
            char to = table[c];
            if (to != 0) {
                chars[i] = to;
            }
        }
        return new String(chars);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy