All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.refine.util.DetectLanguageUtils Maven / Gradle / Ivy

Go to download

OpenRefine is a free, open source power tool for working with messy data and improving it

There is a newer version: 3.8.2
Show newest version

package com.google.refine.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObject;
import com.optimaize.langdetect.text.TextObjectFactory;

public class DetectLanguageUtils {

    private static List languageProfiles = new ArrayList<>();

    public static Optional detect(String text) throws IOException {

        // load the language profiles
        if (languageProfiles.isEmpty()) {
            languageProfiles = new LanguageProfileReader().readAllBuiltIn();
        }

        // build language detector
        LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles)
                .build();

        // create a text object factory
        TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();

        // query the text for detection
        TextObject textObject = textObjectFactory.forText(text);
        Optional lang = languageDetector.detect(textObject);

        return lang;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy