![JAR search and dependency download from the Maven repository](/logo.png)
eu.project.ttc.engines.Merger Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of termsuite-core Show documentation
Show all versions of termsuite-core Show documentation
A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment
package eu.project.ttc.engines;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.jcas.JCas;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import eu.project.ttc.metrics.DiacriticInsensitiveLevenshtein;
import eu.project.ttc.metrics.EditDistance;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.TermOccurrence;
import eu.project.ttc.models.TermVariation;
import eu.project.ttc.resources.ObserverResource;
import eu.project.ttc.resources.TermIndexResource;
/**
*
* @
*
*/
public class Merger extends JCasAnnotator_ImplBase {
private static final Logger logger = LoggerFactory.getLogger(Merger.class);
public static final String TASK_NAME = "Merging variants";
@ExternalResource(key=ObserverResource.OBSERVER, mandatory=true)
protected ObserverResource observerResource;
@ExternalResource(key=TermIndexResource.TERM_INDEX, mandatory=true)
private TermIndexResource termIndexResource;
public static final String SIMILARITY_THRESHOLD = "SimilarityThreshold";
@ConfigurationParameter(name=SIMILARITY_THRESHOLD, mandatory=false, defaultValue="0.9")
private float threshold;
private EditDistance distance = new DiacriticInsensitiveLevenshtein();
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
/*
* Do nothing
*/
}
@Override
public void collectionProcessComplete() throws AnalysisEngineProcessException {
logger.info("Starting " + TASK_NAME);
TermIndex termIndex = termIndexResource.getTermIndex();
int nbMerged = 0;
List rem = Lists.newArrayList();
for(Term t:termIndex.getTerms()) {
List variations = Lists.newArrayList(t.getVariations());
TermVariation v1, v2;
Term t1, t2;
for(int i=0; i= this.threshold;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy