org.jwi.use.JWI Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jwixuse Show documentation
Show all versions of jwixuse Show documentation
Java library utils for JWI
The newest version!
package org.jwi.use;
import edu.mit.jwi.Config;
import edu.mit.jwi.Dictionary;
import edu.mit.jwi.IDictionary;
import edu.mit.jwi.item.*;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
/**
* JWI
*
* @author Bernard Bou
*/
public class JWI
{
public final String wnHome;
private final IDictionary dict;
public JWI(final String wnHome) throws IOException
{
this(wnHome, null);
}
public JWI(final String wnHome, final Config config) throws IOException
{
this.wnHome = wnHome;
System.out.printf("FROM %s%n", wnHome);
// construct the URL to the WordNet dictionary directory
URL url = new File(wnHome).toURI().toURL();
// construct the dictionary object and open it
this.dict = new Dictionary(url, config);
// open it
this.dict.open();
}
public IDictionary getDict()
{
return dict;
}
// M A I N I T E R A T I O N S
public void forAllLemmas(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
String lemma = idx.getLemma();
if (f != null)
{
f.accept(lemma);
}
}
}
}
public void tryForAllLemmas(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
try
{
IIndexWord idx = it.next();
String lemma = idx.getLemma();
if (f != null)
{
f.accept(lemma);
}
}
catch (Exception e)
{
System.err.println(it + " " + e.getMessage());
}
}
}
}
public void forAllSenses(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
if (f != null)
{
f.accept(sense);
}
}
}
}
}
public void tryForAllSenses(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
try
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
if (f != null)
{
f.accept(sense);
}
}
}
catch (Exception e)
{
System.err.println(it + " " + e.getMessage());
}
}
}
}
public void forAllSynsets(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getSynsetIterator(pos);
while (it.hasNext())
{
ISynset synset = it.next();
if (f != null)
{
f.accept(synset);
}
}
}
}
public void tryForAllSynsets(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getSynsetIterator(pos);
while (it.hasNext())
{
try
{
ISynset synset = it.next();
if (f != null)
{
f.accept(synset);
}
}
catch (Exception e)
{
System.err.println(it + " " + e.getMessage());
}
}
}
}
public void forAllSenseEntries(final Consumer f)
{
Iterator it = this.dict.getSenseEntryIterator();
while (it.hasNext())
{
ISenseEntry entry = it.next();
if (f != null)
{
f.accept(entry);
}
}
}
public void tryForAllSenseEntries(final Consumer f)
{
Iterator it = this.dict.getSenseEntryIterator();
while (it.hasNext())
{
try
{
ISenseEntry entry = it.next();
if (f != null)
{
f.accept(entry);
}
}
catch (Exception e)
{
System.err.println(it + " " + e.getMessage());
}
}
}
// S P E C I F I C I T E R A T I O N S
public void forAllSynsetRelations(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getSynsetIterator(pos);
while (it.hasNext())
{
ISynset synset = it.next();
List relatedIds = synset.getRelatedSynsets();
for (ISynsetID relatedId : relatedIds)
{
ISynset related = this.dict.getSynset(relatedId);
if (f != null)
{
f.accept(related);
}
}
}
}
}
public void tryForAllSynsetRelations(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getSynsetIterator(pos);
while (it.hasNext())
{
try
{
ISynset synset = it.next();
List relatedIds = synset.getRelatedSynsets();
for (ISynsetID relatedId : relatedIds)
{
try
{
ISynset related = this.dict.getSynset(relatedId);
if (f != null)
{
f.accept(related);
}
}
catch (Exception e)
{
System.err.println(relatedId + " " + e.getMessage());
}
}
}
catch (Exception e)
{
System.err.println(it + " " + e.getMessage());
}
}
}
}
public void forAllSenseRelations(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
List relatedIds = sense.getRelatedWords();
for (IWordID relatedId : relatedIds)
{
IWord related = this.dict.getWord(relatedId);
if (f != null)
{
f.accept(related);
}
}
}
}
}
}
public void tryForAllSenseRelations(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
try
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
List relatedIds = sense.getRelatedWords();
for (IWordID relatedId : relatedIds)
{
IWord related = this.dict.getWord(relatedId);
if (f != null)
{
f.accept(related);
}
}
}
catch (Exception e)
{
System.err.println(senseid + " " + e.getMessage());
}
}
}
}
}
// S E N S E E X P L O R A T I O N
public void forAllSenseIDs(final String lemma, final Consumer f)
{
for (final POS pos : POS.values())
{
final IIndexWord idx = this.dict.getIndexWord(lemma, pos);
if (idx != null)
{
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
if (f != null)
{
f.accept(senseid);
}
}
}
}
}
public void forAllSenses(final String lemma, final Consumer f)
{
for (final POS pos : POS.values())
{
final IIndexWord idx = this.dict.getIndexWord(lemma, pos);
if (idx != null)
{
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
IWord sense = this.dict.getWord(senseid);
if (f != null)
{
f.accept(sense);
}
}
}
}
}
public void forAllSensekeys(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
ISenseKey sensekey = sense.getSenseKey();
if (f != null)
{
f.accept(sensekey);
}
}
}
}
}
public void tryForAllSensekeys(final Consumer f)
{
for (final POS pos : POS.values())
{
Iterator it = this.dict.getIndexWordIterator(pos);
while (it.hasNext())
{
IIndexWord idx = it.next();
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
try
{
IWord sense = this.dict.getWord(senseid);
if (sense == null)
{
System.err.printf("⚠ senseid: %s ➜ null sense", senseid.toString());
//IWord sense2 = this.dict.getWord(senseid);
continue;
}
ISenseKey sensekey = sense.getSenseKey();
if (f != null)
{
f.accept(sensekey);
}
}
catch (Exception e)
{
System.err.println(senseid + " " + e.getMessage());
}
}
}
}
}
// T R E E E X P L O R A T I O N S
public void walk(final String lemma, final PrintStream ps)
{
for (final POS pos : POS.values())
{
walk(lemma, pos, ps);
}
}
public void walk(final String lemma, final POS pos, final PrintStream ps)
{
// a line in an index file
final IIndexWord idx = this.dict.getIndexWord(lemma, pos);
if (idx != null)
{
// index
ps.println();
ps.println("================================================================================");
ps.println("■ pos = " + pos.name());
// ps.println("lemma = " + idx.getLemma());
walk(idx, ps);
}
}
public void walk(final IIndexWord idx, final PrintStream ps)
{
Set pointers = idx.getPointers();
for (IPointer ptr : pointers)
{
ps.println("has relation = " + ptr.toString());
}
// senseid=(lemma, synsetid, sensenum)
final List senseids = idx.getWordIDs();
for (final IWordID senseid : senseids) // synset id, sense number, and lemma
{
walk(senseid, ps);
}
}
public void walk(final IWordID senseid, final PrintStream ps)
{
ps.println("--------------------------------------------------------------------------------");
//ps.println("senseid = " + senseid.toString());
// sense=(senseid, lexid, sensekey, synset)
IWord sense = this.dict.getWord(senseid);
walk(sense, ps);
// synset
final ISynsetID synsetid = senseid.getSynsetID();
final ISynset synset = this.dict.getSynset(synsetid);
ps.printf("● synset = %s%n", toString(synset));
walk(synset, 1, ps);
}
public void walk(final IWord sense, final PrintStream ps)
{
ps.printf("● sense: %s lexid: %d sensekey: %s%n", sense.toString(), sense.getLexicalID(), sense.getSenseKey());
// adj marker
AdjMarker marker = sense.getAdjectiveMarker();
if (marker != null)
{
ps.println(" marker = " + marker);
}
// sensekey
ISenseKey senseKey = sense.getSenseKey();
ISenseEntry senseEntry = this.dict.getSenseEntry(senseKey);
if (senseEntry == null)
{
System.err.printf("⚠ Missing sensekey %s for sense at offset %d with pos %s%n", senseKey.toString(), sense.getSynset().getOffset(), sense.getPOS().toString());
// throw new IllegalArgumentException(String.format("%s at offset %d with pos %s%n", senseKey.toString(), sense.getSynset().getOffset(),sense.getPOS().toString()));
}
// lexical relations
Map> relatedMap = sense.getRelatedMap();
walk(relatedMap, ps);
// verb frames
List verbFrames = sense.getVerbFrames();
walk(verbFrames, sense.getLemma(), ps);
ps.printf(" sensenum: %s tag cnt:%s%n", senseEntry == null ? "" : senseEntry.getSenseNumber(), senseEntry == null ? "" : senseEntry.getTagCount());
}
public void walk(final Map> relatedMap, final PrintStream ps)
{
if (relatedMap != null)
{
for (Map.Entry> entry : relatedMap.entrySet())
{
IPointer pointer = entry.getKey();
for (IWordID relatedId : entry.getValue())
{
IWord related = this.dict.getWord(relatedId);
ps.printf(" related %s lemma:%s synset:%s%n", pointer, related.getLemma(), related.getSynset().toString());
}
}
}
}
public void walk(final List verbFrames, final String lemma, final PrintStream ps)
{
if (verbFrames != null)
{
for (IVerbFrame verbFrame : verbFrames)
{
ps.printf(" verb frame: %s : %s%n", verbFrame.getTemplate(), verbFrame.instantiateTemplate(lemma));
}
}
}
public void walk(final ISynset synset, final int level, final PrintStream ps)
{
final String indentSpace = new String(new char[level]).replace('\0', '\t');
final Map> links = synset.getRelatedMap();
for (final IPointer p : links.keySet())
{
ps.printf("%s🡆 %s%n", indentSpace, p.getName());
final List relations2 = links.get(p);
walk(relations2, p, level, ps);
}
}
public void walk(final List relations2, final IPointer p, final int level, final PrintStream ps)
{
final String indentSpace = new String(new char[level]).replace('\0', '\t');
for (final ISynsetID synsetid2 : relations2)
{
final ISynset synset2 = this.dict.getSynset(synsetid2);
ps.printf("%s%s%n", indentSpace, toString(synset2));
walk(synset2, p, level + 1, ps);
}
}
public void walk(final ISynset synset, final IPointer p, final int level, final PrintStream ps)
{
final String indentSpace = new String(new char[level]).replace('\0', '\t');
final List relations2 = synset.getRelatedSynsets(p);
for (final ISynsetID synsetid2 : relations2)
{
final ISynset synset2 = this.dict.getSynset(synsetid2);
ps.printf("%s%s%n", indentSpace, toString(synset2));
if (canRecurse(p))
{
walk(synset2, p, level + 1, ps);
}
}
}
// H E L P E R S
public static String toString(final ISynset synset)
{
return getMembers(synset) + synset.getGloss();
}
public static String getMembers(final ISynset synset)
{
final StringBuilder sb = new StringBuilder();
sb.append('{');
boolean first = true;
for (final IWord sense : synset.getWords())
{
if (first)
{
first = false;
}
else
{
sb.append(' ');
}
sb.append(sense.getLemma());
}
sb.append('}');
sb.append(' ');
return sb.toString();
}
private static boolean canRecurse(IPointer p)
{
String symbol = p.getSymbol();
switch (symbol)
{
case "@": // hypernym
case "~": // hyponym
case "%p": // part holonym
case "#p": // part meronym
case "%m": // member holonym
case "#m": // member meronym
case "%s": // substance holonym
case "#s": // substance meronym
case "*": // entail
case ">": // cause
return true;
}
return false;
}
/**
* Main
*
* @param args arguments
* @throws IOException io exception
*/
public static void main(final String[] args) throws IOException
{
final String wnHome = args[0];
final String lemma = args[1];
new JWI(wnHome, null).walk(lemma, System.out);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy