com.google.refine.util.DetectLanguageUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of main Show documentation
Show all versions of main Show documentation
OpenRefine is a free, open source power tool for working with messy data and improving it
package com.google.refine.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObject;
import com.optimaize.langdetect.text.TextObjectFactory;
public class DetectLanguageUtils {
private static List languageProfiles = new ArrayList<>();
public static Optional detect(String text) throws IOException {
// load the language profiles
if (languageProfiles.isEmpty()) {
languageProfiles = new LanguageProfileReader().readAllBuiltIn();
}
// build language detector
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles)
.build();
// create a text object factory
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
// query the text for detection
TextObject textObject = textObjectFactory.forText(text);
Optional lang = languageDetector.detect(textObject);
return lang;
}
}