edu.cmu.lti.lexical_db.NictWordNet Maven / Gradle / Ivy
package edu.cmu.lti.lexical_db;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import edu.cmu.lti.jawjaw.db.SynlinkDAO;
import edu.cmu.lti.jawjaw.db.SynsetDAO;
import edu.cmu.lti.jawjaw.db.SynsetDefDAO;
import edu.cmu.lti.jawjaw.pobj.Lang;
import edu.cmu.lti.jawjaw.pobj.Link;
import edu.cmu.lti.jawjaw.pobj.POS;
import edu.cmu.lti.jawjaw.pobj.Synlink;
import edu.cmu.lti.jawjaw.pobj.SynsetDef;
import edu.cmu.lti.jawjaw.util.WordNetUtil;
import edu.cmu.lti.lexical_db.data.Concept;
import edu.cmu.lti.ws4j.util.PorterStemmer;
import edu.cmu.lti.ws4j.util.WS4JConfiguration;
public class NictWordNet implements ILexicalDatabase {
private static ConcurrentMap> cache;
private static PorterStemmer stemmer;
static {
if ( WS4JConfiguration.getInstance().useCache() ) {
cache = new ConcurrentHashMap>( WS4JConfiguration.getInstance().getMaxCacheSize() );
}
if ( WS4JConfiguration.getInstance().useStem() ) {
stemmer = new PorterStemmer();
}
}
public Collection getAllConcepts(String word, String posText) {
POS pos = POS.valueOf(posText);
List synsets = WordNetUtil.wordToSynsets(word, pos);
List synsetStrings = new ArrayList(synsets.size());
for ( edu.cmu.lti.jawjaw.pobj.Synset synset : synsets ) {
synsetStrings.add(new Concept(synset.getSynset(), POS.valueOf(pos.toString())));
}
return synsetStrings;
}
public Collection getHypernyms(String synset) {
List links = SynlinkDAO.findSynlinksBySynsetAndLink(synset, Link.hype);
List hypernyms = new ArrayList();
for ( Synlink link : links ) {
hypernyms.add( link.getSynset2() );
}
return hypernyms;
}
public Concept getMostFrequentConcept(String word, String pos) {
Collection concepts = getAllConcepts(word,pos);
return concepts.size()>0 ? concepts.iterator().next():null;
}
public Concept findSynsetBySynset(String synset) {
// TODO Auto-generated method stub
return null;
}
// offset looks like "service#n#3"
public String conceptToString(String synset) {
// TODO Auto-generated method stub
return null;
}
public List getGloss(Concept synset, String linkString) {
String key = synset+" "+linkString;
if ( WS4JConfiguration.getInstance().useCache() ) {
List cachedObj = cache.get(key);
if ( cachedObj != null ) return clone(cachedObj);
}
List linkedSynsets = new ArrayList();
Link link = null;
try {
link = Link.valueOf( linkString );
if ( link.equals( Link.mero ) ) {
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.mmem ) );
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.msub ) );
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.mprt ) );
} else if ( link.equals( Link.holo ) ) {
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.hmem ) );
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.hsub ) );
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), Link.hprt ) );
} else if ( link.equals( Link.syns ) ) {
linkedSynsets.add( synset.getSynset() );
} else {
linkedSynsets.addAll( linkToSynsets( synset.getSynset(), link ) );
}
} catch ( IllegalArgumentException e ) {
// I know it's not a good use of catching
// this is how normal gloss is obtained
linkedSynsets.add( synset.getSynset() );
}
List glosses = new ArrayList(linkedSynsets.size());
for ( String linkedSynset : linkedSynsets ) {
String gloss = null;
if ( Link.syns.equals( link ) ) {
// Special case when you want name assigned to the synset, not the gloss.
gloss = synset.getName();
if ( gloss==null ) {
gloss = SynsetDAO.findSynsetBySynset( linkedSynset ).getName();
}
} else {
// This path is the majority
SynsetDef synsetDef = SynsetDefDAO.findSynsetDefBySynsetAndLang( linkedSynset, Lang.eng );
/*
* Let's separate the gloss and example
*/
gloss = WordNetUtil.getGloss( synsetDef );
}
if ( gloss==null ) continue;
//postprocess
//gloss = gloss.replaceAll("[^a-zA-Z0-9]", " ");
gloss = gloss.replaceAll("[.;:,?!(){}\"`$%@<>]", " ");
gloss = gloss.replaceAll("&", " and ");
gloss = gloss.replaceAll("_", " ");
gloss = gloss.replaceAll("[ ]+", " ");
gloss = gloss.replaceAll("(?= WS4JConfiguration.getInstance().getMaxCacheSize() ) {
cache.remove( cache.keySet().iterator().next() );
}
if (glosses!=null) cache.put(key, clone(glosses));
// }
}
return glosses;
}
/**
* Create the copied instance
* @param original
* @return clone
*/
private List clone( List original ) {
return new ArrayList( original );
}
private List linkToSynsets(String synset, Link link) {
List linkedSynsets = new ArrayList();
List synlinks = SynlinkDAO.findSynlinksBySynsetAndLink( synset, link );
for ( Synlink synlink : synlinks ) {
linkedSynsets.add( synlink.getSynset2() );
}
return linkedSynsets;
}
}