All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.project.ttc.engines.Merger Maven / Gradle / Ivy

Go to download

A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment

There is a newer version: 3.0.10
Show newest version
package eu.project.ttc.engines;

import java.util.List;

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.jcas.JCas;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

import eu.project.ttc.metrics.DiacriticInsensitiveLevenshtein;
import eu.project.ttc.metrics.EditDistance;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.TermOccurrence;
import eu.project.ttc.models.TermVariation;
import eu.project.ttc.resources.ObserverResource;
import eu.project.ttc.resources.TermIndexResource;

/**
 * 
 * @
 *
 */
public class Merger extends JCasAnnotator_ImplBase {
	private static final Logger logger = LoggerFactory.getLogger(Merger.class);
	public static final String TASK_NAME = "Merging variants";

	@ExternalResource(key=ObserverResource.OBSERVER, mandatory=true)
	protected ObserverResource observerResource;
	
	@ExternalResource(key=TermIndexResource.TERM_INDEX, mandatory=true)
	private TermIndexResource termIndexResource;
	
	public static final String SIMILARITY_THRESHOLD = "SimilarityThreshold";
	@ConfigurationParameter(name=SIMILARITY_THRESHOLD, mandatory=false, defaultValue="0.9")
	private float threshold;
	

	private EditDistance distance = new DiacriticInsensitiveLevenshtein();

	@Override
	public void process(JCas aJCas) throws AnalysisEngineProcessException {
		/*
		 * Do nothing
		 */
	}
	
	@Override
	public void collectionProcessComplete() throws AnalysisEngineProcessException {
		logger.info("Starting " + TASK_NAME);
		TermIndex termIndex = termIndexResource.getTermIndex();
		int nbMerged = 0;
		
		List rem = Lists.newArrayList();
		for(Term t:termIndex.getTerms()) {
			List variations = Lists.newArrayList(t.getVariations());
			TermVariation v1, v2;
			Term t1, t2;
			for(int i=0; i= this.threshold;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy