All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.articulate.sigma.OMWordnet Maven / Gradle / Ivy

Go to download

Sigma knowledge engineering system is an system for developing, viewing and debugging theories in first order logic. It works with Knowledge Interchange Format (KIF) and is optimized for the Suggested Upper Merged Ontology (SUMO) www.ontologyportal.org.

The newest version!
package com.articulate.sigma;

import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;

import com.articulate.sigma.KB;

public class OMWordnet {

/** This code is copyright Articulate Software (c) 2003.  Some portions
copyright Teknowledge (c) 2003 and reused under the terms of the GNU license.
This software is released under the GNU Public License .
Users of this code also consent, by use of this code, to credit Articulate Software
and Teknowledge in any writings, briefings, publications, presentations, or
other representations of any software which incorporates, builds on, or uses this
code.  Please cite the following article in any publication with references:

Pease, A., (2003). The Sigma Ontology Development Environment,
in Working Notes of the IJCAI-2003 Workshop on Ontology and Distributed Systems,
August 9, Acapulco, Mexico.
 */
    // String key of language name
    // Interior key of a 9-digit WordNet synset and value of and ArrayList of 
    // non-English synset Strings
    public static HashMap>> wordnets = 
            new HashMap>>();
    public static HashMap>> glosses = 
            new HashMap>>();
    public static HashMap>> examples = 
            new HashMap>>();
    
    /** *************************************************************
     */
    private static char getOMWMappingSuffix(String SUMOmapping) {
        
        switch (WordNetUtilities.getSUMOMappingSuffix(SUMOmapping)) {
            case '=': return '='; 
            case '+': return '⊂'; 
            case '@': return '∈'; 
            case ':': return '≠'; 
            case '[': return '⊃'; 
        }
        return ' ';
    }
    
    /** *************************************************************
     */
    private static void generateOMWformat(String fileWithPath) {

        //System.out.println("INFO in WordNetUtilities.generateOMWformat(): writing file " + fileWithPath);
        try {
            File f = new File(fileWithPath);
            FileWriter r = new FileWriter(f); 
            PrintWriter pw = new PrintWriter(r);
            pw.println("# SUMO http://www.ontologyportal.org");
            Iterator it = WordNet.wn.nounSUMOHash.keySet().iterator();
            while (it.hasNext()) {
                String key = it.next();
                String SUMOterm = WordNet.wn.nounSUMOHash.get(key);
                String mappingSuffix = Character.toString(getOMWMappingSuffix(SUMOterm));
                if (SUMOterm.indexOf(" ") < 0)
                    pw.println(key + "-n\tsumo:xref\t" + WordNetUtilities.getBareSUMOTerm(SUMOterm) + "\t" + mappingSuffix);
            }
            it = WordNet.wn.verbSUMOHash.keySet().iterator();
            while (it.hasNext()) {
                String key = it.next();
                String SUMOterm = WordNet.wn.verbSUMOHash.get(key);
                String mappingSuffix = Character.toString(getOMWMappingSuffix(SUMOterm));
                if (SUMOterm.indexOf(" ") < 0)
                    pw.println(key + "-n\tsumo:xref\t" + WordNetUtilities.getBareSUMOTerm(SUMOterm) + "\t" + mappingSuffix);
            }
            it = WordNet.wn.adjectiveSUMOHash.keySet().iterator();
            while (it.hasNext()) {
                String key = it.next();
                String SUMOterm = WordNet.wn.adjectiveSUMOHash.get(key);
                String mappingSuffix = Character.toString(getOMWMappingSuffix(SUMOterm));
                if (SUMOterm.indexOf(" ") < 0)
                    pw.println(key + "-n\tsumo:xref\t" + WordNetUtilities.getBareSUMOTerm(SUMOterm) + "\t" + mappingSuffix);
            }
            it = WordNet.wn.adverbSUMOHash.keySet().iterator();
            while (it.hasNext()) {
                String key = it.next();
                String SUMOterm = WordNet.wn.adverbSUMOHash.get(key);
                String mappingSuffix = Character.toString(getOMWMappingSuffix(SUMOterm));
                if (SUMOterm.indexOf(" ") < 0)
                    pw.println(key + "-n\tsumo:xref\t" + WordNetUtilities.getBareSUMOTerm(SUMOterm) + "\t" + mappingSuffix);
            }
        }
        catch (IOException ioe) {
            System.out.println(ioe.getMessage());
            ioe.printStackTrace();
        }
    }

    /** *************************************************************
     */
    private static void readOMWformat(String inputFileWithPath, String langName) {
        
        //System.out.println("INFO in WordNetUtilities.readOMWformat(): creating table entry for " + langName);
        HashMap> wordnet = new HashMap>();
        wordnets.put(langName,wordnet);
        HashMap> gloss = new HashMap>();
        glosses.put(langName,gloss);
        HashMap> example = new HashMap>();
        examples.put(langName,example);
        File inputf = new File(inputFileWithPath);
        if (!inputf.exists()) return;
        String line = "";
        //System.out.println("INFO in WordNetUtilities.readOMWformat(): read file " + inputFileWithPath);
        try {
            FileReader fr = new FileReader(inputf);
            LineNumberReader lr = new LineNumberReader(fr);
            while ((line = lr.readLine()) != null) {
                if (line.startsWith("#")) continue;
                //System.out.println(line);
                int tabIndex = line.indexOf("\t");
                if (tabIndex > -1) {
                    String id = line.substring(0,tabIndex);
                    int tab2index = line.indexOf("\t",tabIndex+1);
                    if (tab2index > -1) { 
                        //System.out.println(tabIndex + " " + tab2index);
                        String type = line.substring(tabIndex+1,tab2index);  
                        if (type.endsWith("lemma")) {
                            int end = line.length();
                            String value = line.substring(tab2index+1,end);  
                            ArrayList val = wordnet.get(id);
                            if (val == null)
                                val = new ArrayList();
                            val.add(value);
                            wordnet.put(id,val);
                        }
                        if (type.contains(":def ")) {
                            int end = line.length();
                            String value = line.substring(tab2index+1,end);  
                            ArrayList val = gloss.get(id);
                            if (val == null)
                                val = new ArrayList();
                            val.add(value);
                            gloss.put(id,val);
                        }
                        if (type.contains(":exe ")) {
                            int end = line.length();
                            String value = line.substring(tab2index+1,end);  
                            ArrayList val = example.get(id);
                            if (val == null)
                                val = new ArrayList();
                            val.add(value);
                            example.put(id,val);
                        }
                    }
                }            
            }
        }
        catch (IOException ioe) {
            System.out.println(ioe.getMessage());
            ioe.printStackTrace();
        }
    }

    /** *************************************************************
     */
    public static ArrayList lcodes = new ArrayList(Arrays.asList("als","arb",
            "cat","cmn","dan","eng","eus","fas","fin",
            "fra","fre","glg","heb","ita","ind","jpn","nno","nob","pol","por","spa","tha","zsm"));
    public static ArrayList lnames = new ArrayList(Arrays.asList("AlbanianLanguage",
            "ArabicLanguage","CatalanLanguage","ChineseLanguage",
            "DanishLanguage","EnglishLanguage","BasqueLanguage","FarsiLanguage","FinnishLanguage",
            "FrenchLanguage","FrenchLanguage","GalicianLanguage","HebrewLanguage","ItalianLanguage",
            "IndonesianLanguage","JapaneseLanguage","NorwegianNorskLanguage","NorwegianBokmalLanguage",
            "PolishLanguage","PortugueseLanguage","SpanishLanguage","ThaiLanguage","MalayLanguage"));
    
    /** *************************************************************
     */
    public static String codeToLanguage(String code) {
        
        if (lcodes.contains(code))
            return lnames.get(lcodes.indexOf(code));
        else
            return "";
    }

    /** *************************************************************
     */
    public static String languageToCode(String lang) {
        
        if (lnames.contains(lang))
            return lcodes.get(lnames.indexOf(lang));
        else 
            return "";
    }
    
    /** *************************************************************
     * Convert a 9-digit, POS-prefixed WordNet synset to a POS-suffix
     * OMW synset.
     */
    public static String toOMWsynset(String synset) {
        
        //System.out.println("INFO in OMWordnet.toOMWsynset(): " + synset);
        if (synset.length() != 9) {
            System.out.println("Error in OMWordnet.toOMWsynset(): synset not 9 digits: " + synset);
            return synset;
        }
        char POS = WordNetUtilities.posNumberToLetter(synset.charAt(0));
        return synset.substring(1) + "-" + POS;
    }
    
    /** *************************************************************
     * Convert a POS-suffix OMW synset to an 8-digit WordNet synset.
     */
    public static String fromOMWsynset(String synset) {
        
        //System.out.println("INFO in OMWordnet.fromOMWsynset(): " + synset);
        if (synset.length() != 10) {
            System.out.println("Error in OMWordnet.fromOMWsynset(): synset not 9 digits: " + synset);
            return synset;
        }
        return synset.substring(0,synset.length()-2);
    }    
    
    /** *************************************************************
     * Assumes a fixed set of files in the KBs directory.
     */
    public static void readOMWfiles() {
        
        String kbDir = KBmanager.getMgr().getPref("kbDir");  
        System.out.println("INFO in OMWordnet.readOMWfiles(): reading files: ");
        for (int i = 0; i < lcodes.size(); i++) {
            String filename = kbDir + File.separator + "OMW" + 
                    File.separator + lcodes.get(i)  + File.separator + 
                    "wn-data-" + lcodes.get(i) + ".tab";
            System.out.print(" " + filename.substring(filename.length()-15));
            readOMWformat(filename,lcodes.get(i));            
        }
        System.out.println();
    }
    
    /** *************************************************************
     */
    public static void generateOMWOWLformat(KB kb) {
        
        String line;
        //System.out.println("INFO in WordNetUtilities.generateOMWformat(): writing file ");
        try {
            String kbDir = KBmanager.getMgr().getPref("kbDir");
            File f = new File(kbDir + File.separator + "OMW" + 
                    File.separator + "OMW.owl");
            FileWriter fw = new FileWriter(f); 
            PrintWriter pw = new PrintWriter(fw);
            pw.println("");
            pw.println("");
            pw.println("A provisional and necessarily lossy translation to OWL.  Please see");
            pw.println("www.ontologyportal.org for the original KIF, which is the authoritative");
            pw.println("source.  This software is released under the GNU Public License");
            pw.println("www.gnu.org.Produced on date: Tue Sep 03 11:07:34 PDT 2013");
            pw.println("");
            pw.println("");
            pw.println("");            
        }
        catch (IOException ioe) {
            System.out.println(ioe.getMessage());
            ioe.printStackTrace();
        }
    }
    
    /** ***************************************************************
     * HTML format a list of word senses
     * @param term is the SUMO term
     * @param lang is the SUMO term for a language (EnglishLanguage, FrenchLanguage etc)
     */
    public static String formatWords(String term, String kbName, String lang, String href) {

        //System.out.println("INFO in OMWordnet.formatWords(): " + term + " " + lang);
        HashMap> wordnet = wordnets.get(languageToCode(lang));
        if (wordnet == null || wordnet.size() == 0)
            return "";
        StringBuffer result = new StringBuffer();
        ArrayList synsets = WordNet.wn.SUMOHash.get(term);
        int limit = synsets.size();
        if (limit > 50)
            limit = 50;
        for (int i = 0; i < limit; i++) {
            String synset = synsets.get(i);
            String OMWsynset = toOMWsynset(synset);
            ArrayList words = wordnet.get(OMWsynset);
            if (words != null) {
                for (int j = 0; j < words.size(); j++) {
                    result.append("");
                    result.append(words.get(j));
                    result.append("");
                    if (j < words.size() - 1)
                        result.append(", ");
                }
                if (i < limit - 1)
                    result.append(", ");
            }
        }
        if (synsets.size() > 50)
            result.append("...");
        return result.toString();
    }
    
    /** *************************************************************
     */
    private static String formatArrayList(ArrayList al) {
        
        if (al == null) return "";
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < al.size(); i++) {
            sb.append(al.get(i));
            if (i"); 
        String name = "";
        String id = "";
        ArrayList words = null;
        ArrayList exams = null;
        ArrayList defs = null;
        for (int i = 0; i < lnames.size(); i++) {
            name = lnames.get(i);
            id = lcodes.get(i);
            words = wordnets.get(id).get(synset);
            exams = examples.get(id).get(synset);
            defs = glosses.get(id).get(synset);
            if (words != null || exams != null || defs != null) {
                sb.append("" + name.substring(0,name.length()-8) + "\n");
                sb.append("");
                if (words != null)
                    sb.append(formatArrayList(words) + "
\n"); if (defs != null) sb.append("" + formatArrayList(defs) + "
\n"); if (exams != null) sb.append("" + formatArrayList(exams) + "\n"); sb.append("\n"); } } sb.append("\n"); return sb.toString(); } /** *************************************************************** * A main method, used only for testing. It should not be called * during normal operation. */ public static void main (String[] args) { try { KBmanager.getMgr().initializeOnce(); //readOMWfiles(); System.out.println(formatWords("Table","","FrenchLanguage","")); } catch (Exception e) { System.out.println("Error in OMWordnet.main(): Exception: " + e.getMessage()); e.printStackTrace(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy