All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.univnantes.termsuite.index.providers.AllComponentPairsProvider Maven / Gradle / Ivy

package fr.univnantes.termsuite.index.providers;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import com.google.common.collect.Sets;

import fr.univnantes.termsuite.engines.splitter.CompoundUtils;
import fr.univnantes.termsuite.index.TermIndexValueProvider;
import fr.univnantes.termsuite.model.Component;
import fr.univnantes.termsuite.model.Term;
import fr.univnantes.termsuite.model.TermWord;
import fr.univnantes.termsuite.model.Word;
import fr.univnantes.termsuite.utils.CollectionUtils;
import fr.univnantes.termsuite.utils.Pair;
import fr.univnantes.termsuite.utils.TermUtils;

/**
 * Provides all lemma-lemma pairs found in the term.
 * 
 * Ex1: offshore wind energy
 * 			--> {offshore+wind, energy+wind, offshore+energy}
 * 
 * Performs two iterations when there are compounds
 * 
 * Ex2: horizontal-axis wind turbine
 * 			it1 --> {horizontal-axis+turbine, horizontal-axis+turbine, wind+turbine}
 * 			it2 --> {axis+horizontal, axis+turbine, axis+wind, horizontal+turbine, horizontal+wind, wind+turbine}
 * 			total -->  it1 U it2
 * 
 * @author Damien Cram
 * 
 */
public class AllComponentPairsProvider implements TermIndexValueProvider {

	@Override
	public Collection getClasses(Term term) {
		Set> componentPairs = Sets.newHashSet();
		Set significantWords = Sets.newHashSetWithExpectedSize(term.getWords().size());
		
		/*
		 * 1- select significant words
		 */
		for(TermWord w:term.getWords()) {
			if(w.isSwt())
				significantWords.add(w.getWord());
		}
		
		/*
		 * 2- Adds intra-compound component pairs (for compound words only)
		 */
		significantWords.stream()
			.filter(Word::isCompound)
			.forEach(w -> componentPairs.addAll(CompoundUtils.innerComponentPairs(w)));
		
		/*
		 * 3- Add inter-word component pairs
		 */
		componentPairs.addAll(CollectionUtils.combineAndProduct(TermUtils.toComponentSets(significantWords)));
		
		
		/*
		 * 4- transform each component pair to class
		 */
		Set classes = new HashSet<>();
			
		componentPairs.stream()
			.forEach(pair -> classes.addAll(CompoundUtils.toIndexStrings(pair)));
		return classes;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy