All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gr.iti.mklab.sfc.processors.LanguageDetector Maven / Gradle / Ivy

Go to download

Monitors a set of social streams (e.g. Twitter status updates) and collects the incoming content.

The newest version!
package gr.iti.mklab.sfc.processors;

import gr.iti.mklab.framework.common.domain.Item;
import gr.iti.mklab.framework.common.domain.config.Configuration;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;


public class LanguageDetector extends Processor {

	public LanguageDetector(Configuration configuration) {
		super(configuration);
		String profileDirectory = configuration.getParameter("profileDirectory", "profiles.sm");
		try {
			DetectorFactory.loadProfile(profileDirectory);
		} catch (LangDetectException e) {
			e.printStackTrace();
		}
	}

	@Override
	public void process(Item item) {
		String lang = item.getLanguage();
		if(lang == null) {
			// detect language if not exist
			String text = null;
			String title = item.getTitle();
			String description = item.getDescription();
			
			if(title != null) {
				text = title;
			}
			else if (description != null) {
				text = description;
			}
			else {
				return;
			}
			
			try {
				Detector detector = DetectorFactory.create();
				
				detector.append(text);
				lang = detector.detect();
				item.setLanguage(lang);
				
			} catch (Exception e) {
				
			}
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy