All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.optimaize.langdetect.cybozu.util.Util Maven / Gradle / Ivy

package com.optimaize.langdetect.cybozu.util;

import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.ngram.OldNgramExtractor;
import org.jetbrains.annotations.NotNull;

import java.util.Formatter;
import java.util.List;
import java.util.Map;

/**
 * A place for sharing code.
 *
 * @author Fabian Kessler
 */
public class Util {

    private static final NgramExtractor ngramExtractor = NgramExtractors.standard();

    public static void addCharSequence(LangProfile langProfile, CharSequence text) {
        //TODO replace with new code.

//        List old = OldNgramExtractor.extractNGrams(text, null);
//        List nuu = ngramExtractor.extractGrams(text);
//
//        Set oldSet = new HashSet<>(old);
//        Set nuuSet = new HashSet<>(nuu);
//
//        ArrayList justNuu = new ArrayList<>(nuu);
//        justNuu.removeAll(old);
//
//        ArrayList justOld = new ArrayList<>(old);
//        justOld.removeAll(nuu);
//
//        System.out.println(text);

//        for (String s : ngramExtractor.extractGrams(text)) {
//            langProfile.add(s);
//        }
        for (String s : OldNgramExtractor.extractNGrams(text, null)) {
            langProfile.add(s);
        }
    }



    /**
     * unicode encoding (for verbose mode)
     */
    public static String unicodeEncode(String s) {
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < s.length(); ++i) {
            char ch = s.charAt(i);
            if (ch >= '\u0080') {
                String st = Integer.toHexString(0x10000 + (int) ch);
                while (st.length() < 4) st = "0" + st;
                buf.append("\\u").append(st.subSequence(1, 5));
            } else {
                buf.append(ch);
            }
        }
        return buf.toString();
    }


    /**
     * normalize probabilities and check convergence by the maximum probability
     * @return maximum of probabilities
     */
    public static double normalizeProb(double[] prob) {
        double maxp = 0, sump = 0;
        for(int i=0;i langlist) {
        Formatter formatter = new Formatter();
        for(int j=0;j=0.00001) {
                formatter.format(" %s:%.5f", langlist.get(j), p);
            }
        }
        return formatter.toString();
    }


    /**
     */
    public static double[] makeInternalPrioMap(@NotNull Map langWeightingMap,
                                                @NotNull List langlist) {
        assert !langWeightingMap.isEmpty();
        double[] priorMap = new double[langlist.size()];
        double sump = 0;
        for (int i=0;i=0 : "Prior probability must be non-negative!";
                priorMap[i] = p;
                sump += p;
            }
        }
        assert sump > 0 : "Sum must be greater than zero!";
        for (int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy