com.cybozu.labs.langdetect.util.Util Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of language-detector Show documentation
Show all versions of language-detector Show documentation
Language Detection Library for Java.
package com.cybozu.labs.langdetect.util;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.ngram.OldNgramExtractor;
import org.jetbrains.annotations.NotNull;
import java.util.*;
/**
* A place for sharing code.
*
* @author Fabian Kessler
*/
public class Util {
private static final NgramExtractor ngramExtractor = NgramExtractors.standard();
public static void addCharSequence(LangProfile langProfile, CharSequence text) {
//TODO replace with new code.
// List old = OldNgramExtractor.extractNGrams(text, null);
// List nuu = ngramExtractor.extractGrams(text);
//
// Set oldSet = new HashSet<>(old);
// Set nuuSet = new HashSet<>(nuu);
//
// ArrayList justNuu = new ArrayList<>(nuu);
// justNuu.removeAll(old);
//
// ArrayList justOld = new ArrayList<>(old);
// justOld.removeAll(nuu);
//
// System.out.println(text);
// for (String s : ngramExtractor.extractGrams(text)) {
// langProfile.add(s);
// }
for (String s : OldNgramExtractor.extractNGrams(text, null)) {
langProfile.add(s);
}
}
/**
* unicode encoding (for verbose mode)
*/
public static String unicodeEncode(String s) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < s.length(); ++i) {
char ch = s.charAt(i);
if (ch >= '\u0080') {
String st = Integer.toHexString(0x10000 + (int) ch);
while (st.length() < 4) st = "0" + st;
buf.append("\\u").append(st.subSequence(1, 5));
} else {
buf.append(ch);
}
}
return buf.toString();
}
/**
* normalize probabilities and check convergence by the maximum probability
* @return maximum of probabilities
*/
public static double normalizeProb(double[] prob) {
double maxp = 0, sump = 0;
for(int i=0;i langlist) {
Formatter formatter = new Formatter();
for(int j=0;j=0.00001) {
formatter.format(" %s:%.5f", langlist.get(j), p);
}
}
return formatter.toString();
}
/**
*/
public static double[] makeInternalPrioMap(@NotNull Map langWeightingMap,
@NotNull List langlist) {
assert !langWeightingMap.isEmpty();
double[] priorMap = new double[langlist.size()];
double sump = 0;
for (int i=0;i=0 : "Prior probability must be non-negative!";
priorMap[i] = p;
sump += p;
}
}
assert sump > 0 : "Sum must be greater than zero!";
for (int i=0;i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy