com.articulate.sigma.wordNet.WNdiagnostics Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-component Show documentation
Show all versions of sigma-component Show documentation
Sigma knowledge engineering system is an system for developing, viewing and debugging theories in first
order logic. It works with Knowledge Interchange Format (KIF) and is optimized for the Suggested Upper Merged
Ontology (SUMO) www.ontologyportal.org.
package com.articulate.sigma.wordNet;
/** This code is copyright Articulate Software (c) 2003.
This software is released under the GNU Public License .
Users of this code also consent, by use of this code, to credit Articulate Software
and Teknowledge in any writings, briefings, publications, presentations, or
other representations of any software which incorporates, builds on, or uses this
code. Please cite the following article in any publication with references:
Pease, A., (2003). The Sigma Ontology Development Environment,
in Working Notes of the IJCAI-2003 Workshop on Ontology and Distributed Systems,
August 9, Acapulco, Mexico.
*/
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import com.articulate.sigma.*;
/** *****************************************************************
* A class that finds problems in WordNet to KB mappings. It is not meant
* to be instantiated.
*/
public class WNdiagnostics {
/** *****************************************************************
* @return an ArrayList of Strings which are WordNet synsets that don't
* have a corresponding term in the knowledge base
*/
public static ArrayList synsetsWithoutTerms() {
ArrayList result = new ArrayList();
Iterator it = WordNet.wn.synsetsToWords.keySet().iterator();
while (it.hasNext()) {
String synset = (String) it.next();
String POS = synset.substring(0,1);
synset = synset.substring(1);
switch (POS.charAt(0)) {
case '1':
if (WordNet.wn.nounSUMOHash.get(synset) == null)
result.add(POS+synset);
break;
case '2':
if (WordNet.wn.verbSUMOHash.get(synset) == null)
result.add(POS+synset);
break;
case '3':
if (WordNet.wn.adjectiveSUMOHash.get(synset) == null)
result.add(POS+synset);
break;
case '4':
if (WordNet.wn.adverbSUMOHash.get(synset) == null)
result.add(POS+synset);
break;
}
if (result.size() > 50) {
result.add("limited to 50 results.");
return result;
}
}
return result;
}
/** *****************************************************************
* @return an ArrayList of Strings which are terms that don't
* have a corresponding synset
*/
public static ArrayList nonRelationTermsWithoutSynsets() {
ArrayList result = new ArrayList();
KB kb = KBmanager.getMgr().getKB("SUMO");
Iterator it = kb.terms.iterator();
while (it.hasNext()) {
String term = (String) it.next();
if (!WordNet.wn.SUMOHash.containsKey(term) & !Formula.isFunction(term) &&
Character.isUpperCase(term.charAt(0)))
result.add(term);
}
return result;
}
/** *****************************************************************
* @return an ArrayList of Strings which are WordNet synsets that have
* an identified term but that doesn't exist in the currently loaded
* knowledge base
*/
public static ArrayList synsetsWithoutFoundTerms(KB kb) {
ArrayList result = new ArrayList();
Iterator it = WordNet.wn.synsetsToWords.keySet().iterator();
while (it.hasNext()) {
String synset = it.next();
String POS = synset.substring(0,1);
String term = "";
synset = synset.substring(1);
switch (POS.charAt(0)) {
case '1':
term = (String) WordNet.wn.nounSUMOHash.get(synset);
break;
case '2':
term = (String) WordNet.wn.verbSUMOHash.get(synset);
break;
case '3':
term = (String) WordNet.wn.adjectiveSUMOHash.get(synset);
break;
case '4':
term = (String) WordNet.wn.adverbSUMOHash.get(synset);
break;
}
if (term != null) {
synchronized (kb.getTerms()) {
ArrayList termList = WordNetUtilities.convertTermList(term);
for (int i = 0; i < termList.size(); i++) {
String newterm = (String) termList.get(i);
if (newterm.charAt(0) != '(') {
if (!kb.getTerms().contains(newterm))
result.add(POS+synset);
}
}
}
}
if (result.size() > 50) {
result.add("limited to 50 results.");
return result;
}
}
return result;
}
/** *****************************************************************
* @return an ArrayList of Strings which are HTML-formatted presentations
* of SUMO terms, and WordNet synsets and that don't
* have a matching taxonomic structure with their corresponding SUMO
* terms. Currently, this just examines nouns and needs to be expanded
* to examine verbs too.
*/
public static ArrayList nonMatchingTaxonomy(String kbName, String language) {
String synsetHTML = " result = new ArrayList();
Iterator it = WordNet.wn.nounSUMOHash.keySet().iterator();
while (it.hasNext()) {
//System.out.println();
String synset = it.next(); // not a prefixed synset
if (WordNet.wn.nounSUMOHash.get(synset) != null) {
ArrayList words = WordNet.wn.synsetsToWords.get("1"+synset);
String sumoTerm = (String) WordNet.wn.nounSUMOHash.get(synset);
String word = (String) words.get(0);
//System.out.println("Source word: " + word);
ArrayList rels = WordNet.wn.relations.get("1"+synset); // relations requires prefixes
if (rels != null) {
Iterator it2 = rels.iterator();
while (it2.hasNext()) {
AVPair avp = (AVPair) it2.next();
if (avp.attribute.equals("hypernym") || avp.attribute.equals("hyponym")) {
String targetSynset = avp.value;
ArrayList targetWords = WordNet.wn.synsetsToWords.get(targetSynset);
String targetWord = (String) targetWords.get(0);
//System.out.println("Target word: " + targetWord);
String targetBareSynset = avp.value.substring(1);
String targetSUMO = (String) WordNet.wn.nounSUMOHash.get(targetBareSynset);
//System.out.println("SUMO source: " + sumoTerm);
//System.out.println("SUMO target: " + targetSUMO);
String bareSUMOterm = WordNetUtilities.getBareSUMOTerm(sumoTerm);
String bareTargetSUMO = WordNetUtilities.getBareSUMOTerm(targetSUMO);
if (sumoTerm != null) {
KB kb = KBmanager.getMgr().getKB("SUMO");
HashSet SUMOtaxonomy = new HashSet();
String arrow = "->";
if (avp.attribute.equals("hypernym"))
SUMOtaxonomy = kb.kbCache.getParentClasses(bareSUMOterm);
if (avp.attribute.equals("hyponym")) {
SUMOtaxonomy = kb.kbCache.getChildClasses(bareSUMOterm);
arrow = "<-";
}
//System.out.println("taxonomy: " + SUMOtaxonomy);
if (SUMOtaxonomy != null && targetSUMO != null && !SUMOtaxonomy.contains(bareTargetSUMO) &&
!bareSUMOterm.equals(bareTargetSUMO)) {
StringBuffer resultString = new StringBuffer();
resultString.append("(" + synsetHTML + "synset=1" + synset + "\">" + word + " " + arrow);
resultString.append(synsetHTML + "synset=" + targetSynset + "\">" + targetWord + ") ");
resultString.append("(" + termHTML + "term=" + bareSUMOterm + "\">" + bareSUMOterm + "!" + arrow);
resultString.append(termHTML + "term=" + bareTargetSUMO + "\">" + bareTargetSUMO + ")
\n");
result.add(resultString.toString());
if (result.size() > 50) {
result.add("limited to 50 results.");
return result;
}
}
}
}
}
}
}
}
return result;
}
/** ***************************************************************
* Create an HTML-formatted table that counts WordNet-SUMO mapping
* types.
*/
public static String countMappings() {
int equals = 0;
int plus = 0;
int ampersand = 0;
int leftbr = 0;
int rightbr = 0;
int colon = 0;
Iterator it = WordNet.wn.nounSUMOHash.keySet().iterator(); // Keys are synset Strings, values are SUMO
// terms with the &% prefix and =, +, @ or [ suffix.
while (it.hasNext()) {
String key = (String) it.next();
String mapping = (String) WordNet.wn.nounSUMOHash.get(key);
switch (mapping.charAt(mapping.length()-1)) {
case '=': equals++; break;
case '+': plus++; break;
case '@': ampersand++; break;
case '[': leftbr++; break;
case ']': rightbr++; break;
case ':': colon++; break;
}
}
it = WordNet.wn.verbSUMOHash.keySet().iterator(); // Keys are synset Strings, values are SUMO
// terms with the &% prefix and =, +, @ or [ suffix.
while (it.hasNext()) {
String key = (String) it.next();
String mapping = (String) WordNet.wn.verbSUMOHash.get(key);
switch (mapping.charAt(mapping.length()-1)) {
case '=': equals++; break;
case '+': plus++; break;
case '@': ampersand++; break;
case '[': leftbr++; break;
case ']': rightbr++; break;
case ':': colon++; break;
}
}
it = WordNet.wn.adjectiveSUMOHash.keySet().iterator(); // Keys are synset Strings, values are SUMO
// terms with the &% prefix and =, +, @ or [ suffix.
while (it.hasNext()) {
String key = (String) it.next();
String mapping = (String) WordNet.wn.adjectiveSUMOHash.get(key);
switch (mapping.charAt(mapping.length()-1)) {
case '=': equals++; break;
case '+': plus++; break;
case '@': ampersand++; break;
case '[': leftbr++; break;
case ']': rightbr++; break;
case ':': colon++; break;
}
}
it = WordNet.wn.adverbSUMOHash.keySet().iterator(); // Keys are synset Strings, values are SUMO
// terms with the &% prefix and =, +, @ or [ suffix.
while (it.hasNext()) {
String key = (String) it.next();
String mapping = (String) WordNet.wn.adverbSUMOHash.get(key);
switch (mapping.charAt(mapping.length()-1)) {
case '=': equals++; break;
case '+': plus++; break;
case '@': ampersand++; break;
case '[': leftbr++; break;
case ']': rightbr++; break;
case ':': colon++; break;
}
}
String result = "equivalent subsuming instance " +
"anti-subsuming anti-instance anti-equivalent \n" +
"" + equals + " " + plus + " " + ampersand + " " +
"" + leftbr + " " + rightbr + " " + colon + "
\n" +
"nouns verbs " +
"adjectives adverbs \n" +
"" + WordNet.wn.nounSUMOHash.keySet().size() +
" " + WordNet.wn.verbSUMOHash.keySet().size() +
" " + WordNet.wn.adjectiveSUMOHash.keySet().size() +
" " + WordNet.wn.adverbSUMOHash.keySet().size() +
"
\n";
return result;
}
/** ***************************************************************
* A main method, used only for testing. It should not be called
* during normal operation.
*/
public static void main (String[] args) {
try {
KBmanager.getMgr().initializeOnce();
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
WordNet.wn.initOnce();
System.out.println(nonRelationTermsWithoutSynsets());
}
}