All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.lti.ws4j.util.Traverser Maven / Gradle / Ivy

package edu.cmu.lti.ws4j.util;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import edu.cmu.lti.jawjaw.db.SynlinkDAO;
import edu.cmu.lti.jawjaw.pobj.Link;
import edu.cmu.lti.jawjaw.pobj.Synlink;
import edu.cmu.lti.jawjaw.pobj.Word;
import edu.cmu.lti.jawjaw.util.WordNetUtil;
import edu.cmu.lti.lexical_db.data.Concept;

//TODO: separate util into synsetutil and misc util?
public class Traverser {

	/* Discussion: should we make it non-static?*/
	private static ConcurrentMap> horizonCache;
	private static ConcurrentMap> upwardCache;
	private static ConcurrentMap> downwardCache;
	public static int capacity;
	
	static {
		if ( WS4JConfiguration.getInstance().useCache() ) {
			capacity = WS4JConfiguration.getInstance().getMaxCacheSize();
			horizonCache = new ConcurrentHashMap>( capacity );
			upwardCache = new ConcurrentHashMap>( capacity );
			downwardCache = new ConcurrentHashMap>( capacity );
		}
	}
	
	/**
	 * Identify surface text level inclusion given two synsets.
	 * Original algorithm takes two original words whereas this
	 * implementation takes care of all surface forms related to the synsets.
	 * 
	 * @param synset1 synset
	 * @param synset2 another synset
	 * @return if haystack synset name is including needle synset name 
	 */
	public static boolean contained( Concept synset1, Concept synset2 ) {
		if ( synset1==null || synset2==null ) return false;
		List wordsH = WordNetUtil.synsetToWords( synset1.getSynset() );
		List wordsN = WordNetUtil.synsetToWords( synset2.getSynset() );
		
		for ( Word wordH : wordsH ) {
			for ( Word wordN : wordsN ) {
				if ( wordH.getLemma().indexOf( wordN.getLemma() ) != -1 || 
					 wordN.getLemma().indexOf( wordH.getLemma() ) != -1	) {
					return true;
				}
			}
		}
		
		return false;
	}
	
	/**
	 * All horizontal links specified  are --
	 * Also See, Antonymy, Attribute, Pertinence, Similarity.
	 */
	public static Set getHorizontalSynsets( String synset ) {
		String key = synset;
		if ( WS4JConfiguration.getInstance().useCache() ) {
			Set cachedObj = horizonCache.get(key);
			if ( cachedObj != null ) return cachedObj;
		}
		
		List links = new ArrayList();
		links.add(Link.ants);
		links.add(Link.attr);
		links.add(Link.sim);
		
		Set result = getGroupedSynsets( synset, links );
		if ( WS4JConfiguration.getInstance().useCache() ) {
//			synchronized ( horizonCache ) {
				if ( horizonCache.size() >= WS4JConfiguration.getInstance().getMaxCacheSize() ) {
					horizonCache.remove( horizonCache.keySet().iterator().next() );
				}
				if (result!=null) horizonCache.put(key, result); // CLONE!?
//			}
		}
		return result;
	}
	
	/**
	 * Upward link types -- Hypernymy, Meronymy
	 */
	public static Set getUpwardSynsets( String synset ) {
		String key = synset;
		if ( WS4JConfiguration.getInstance().useCache() ) {
			Set cachedObj = upwardCache.get(key);
			if ( cachedObj != null ) return cachedObj;
		}
		
		List links = new ArrayList();
		links.add(Link.hype);
		links.add(Link.mero);
		links.add(Link.mmem);
		links.add(Link.mprt);
		links.add(Link.msub);
		
		Set result = getGroupedSynsets( synset, links );
		if ( WS4JConfiguration.getInstance().useCache() ) {
//			synchronized ( upwardCache ) {
				if ( upwardCache.size() >= WS4JConfiguration.getInstance().getMaxCacheSize() ) {
					upwardCache.remove( upwardCache.keySet().iterator().next() );
				}
				if (result!=null) upwardCache.put(key, result); // CLONE!?
//			}
		}
		return result;
	}
	
	/**
	 * subroutine that returns all offsetPOSs that are linked
	 * to a given synset by downward links. Downward link types --
	 * Cause, Entailment, Holonymy, Hyponymy.
	 */
	public static Set getDownwardSynsets( String synset ) {
		String key = synset;
		if ( WS4JConfiguration.getInstance().useCache() ) {
			Set cachedObj = downwardCache.get(key);
			if ( cachedObj != null ) return cachedObj;
		}
		
		List links = new ArrayList();
		links.add(Link.caus);
		links.add(Link.enta);
		links.add(Link.holo);
		links.add(Link.hmem);
		links.add(Link.hsub);
		links.add(Link.hprt);
		links.add(Link.hypo);
		
		Set result = getGroupedSynsets( synset, links );
		if ( WS4JConfiguration.getInstance().useCache() ) {
//			synchronized ( downwardCache ) {
				if ( downwardCache.size() >= WS4JConfiguration.getInstance().getMaxCacheSize() ) {
					downwardCache.remove( downwardCache.keySet().iterator().next() );
				}
				if (result!=null) downwardCache.put(key, result); // CLONE!?
//			}
		}
		return result;
	}
	
	private static Set getGroupedSynsets( String synset, List links ) {
		List synlinks = new ArrayList(); 
		for ( Link link : links ) {
			synlinks.addAll( SynlinkDAO.findSynlinksBySynsetAndLink(synset, link) );
		}
		Set synsets = new LinkedHashSet( synlinks.size() );
		for ( Synlink synlink : synlinks ) {
			synsets.add( synlink.getSynset2() );
		}
		// in case original synset is included...
		//synsets.remove( synset );
		return synsets;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy