All Downloads are FREE. Search and download functionalities are using the official Maven repository.

semRewrite.CommonCNFUtil Maven / Gradle / Ivy

Go to download

Natural language processing toolbox using Sigma knowledge engineering system.

There is a newer version: 1.1
Show newest version
package semRewrite;/*
Copyright 2014-2015 IPsoft

Author: Peigen You [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA  02111-1307 USA
*/

import com.articulate.sigma.KB;
import com.articulate.sigma.KBmanager;
import com.articulate.sigma.StringUtil;
import semRewrite.CNF;
import semRewrite.Clause;
import com.google.common.collect.Lists;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

import java.io.*;
import java.util.*;

/***********************************************************
 * A set of static functions that heal dealing with CNF
 *
 * findPath     will find all paths between two literals in the CNF string
 * findOneCommonCNF    will try to find the intersection CNF among a list of CNFs
 * getCommonCNF    will try to find common CNF pair between sentences
 */
public class CommonCNFUtil {

    private static List ignorePreds = Arrays.asList(new String[]{"number", "tense"/**, "root", "names"**/});
    private static KBmanager kbm;
    private static KB kb;

    static {
        kbm = KBmanager.getMgr();
        kb = kbm.getKB("SUMO");
    }

    private static Comparator clauseComparator = new Comparator() {

        @Override
        public int compare(semRewrite.Clause o1, semRewrite.Clause o2) {

            return o1.disjuncts.get(0).pred.compareTo(o2.disjuncts.get(0).pred);
        }
    };

    /***********************************************************
     * prevent instantiation
     */
    private CommonCNFUtil() {

    }

    /***********************************************************
     * save the intermediate parsing result to JSON file
     */
    public static String saveCNFMaptoFile(List list, String path) {

        File f = new File(path);
        if (!f.exists())
            try {
                f.createNewFile();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        System.out.println(f.getAbsolutePath());
        try (PrintWriter pw = new PrintWriter(f)) {
            JSONArray arr = new JSONArray();
            for (QAPair k : list) {
                JSONObject obj = new JSONObject();
                obj.put("file", k.file);
                obj.put("index", "" + k.index);
                obj.put("query", k.query);
                obj.put("queryCNF", k.queryCNF.toString());
                obj.put("answer", k.answer);
                obj.put("answerCNF", k.answerCNF.toString());
                arr.add(obj);
            }
            System.out.println(arr.toJSONString());
            pw.print(arr.toJSONString());
            pw.flush();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return f.getAbsolutePath();
    }

    /***********************************************************
     * load the intermediate parsing result to save time
     */
    public static List loadCNFMapfromFile(String path) {

        JSONParser jp = new JSONParser();
        List res = new ArrayList();
        try {
            JSONArray arr = (JSONArray) jp.parse(new FileReader(path));
            Iterator iterator = arr.iterator();
            while (iterator.hasNext()) {
                JSONObject obj = iterator.next();
                Integer index = Integer.parseInt((String) obj.get("index"));
                String query = (String) obj.get("query");
                String answer = (String) obj.get("answer");
                String file = (String) obj.get("file");
                QAPair item = new QAPair(index, file, query, answer);
                String k = (String) obj.get("queryCNF");
                CNF cnf = CNF.parseSimple(new Lexer(k));
                k = (String) obj.get("answerCNF");
                CNF cnf2 = CNF.parseSimple(new Lexer(k));
                item.queryCNF = cnf;
                item.answerCNF = cnf2;
                res.add(item);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        catch (ParseException e) {
            e.printStackTrace();
        }
        return res;
    }

    /***********************************************************
     * load sentences from file, one line one sentence
     */
    public static String[] loadSentencesFromTxt(String path) {

        ArrayList res = new ArrayList();
        try (Scanner in = new Scanner(new FileReader(path))) {
            while (in.hasNextLine()) {
                String line = in.nextLine();
                if (StringUtil.emptyString(line))
                    continue;
                res.add(line);
            }
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return res.toArray(new String[res.size()]);
    }

    /***********************************************************
     * load sentences from file, one line one sentence
     * return map
     */
    public static Map loadSentencesMap(String path) {

        Map res = new HashMap();
        try (Scanner in = new Scanner(new FileReader(path))) {
            int index = 0;
            while (in.hasNextLine()) {
                String line = in.nextLine();
                res.put(index++, line);
            }
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return res;
    }

    /***********************************************************
     * load sentences from "IRtest.json" like QA pair file
     */
    public static List loadSentencesFormJsonFile(String path) {

        JSONParser jp = new JSONParser();
        List res = new ArrayList();
        try {
            JSONArray arr = (JSONArray) jp.parse(new FileReader(path));
            Iterator iterator = arr.iterator();
            int i = 0;
            while (iterator.hasNext()) {
                JSONObject obj = iterator.next();
                String filename = (String) obj.get("file");
                String query = (String) obj.get("query");
                String answer = (String) obj.get("answer");
                QAPair item = new QAPair(i++, filename, query, answer);
                res.add(item);
            }
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (ParseException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return res;
    }

    /***********************************************************
     * get rid of "sumo(X,Y)" and other terms defined in ignorePreds
     */
    private static CNF preProcessCNF(CNF cnf) {

        Iterator iterator = cnf.clauses.iterator();
        while (iterator.hasNext()) {
            semRewrite.Clause c = iterator.next();
            if (ignorePreds.contains(c.disjuncts.get(0).pred)) {
                iterator.remove();
                continue;
            }
            if (c.disjuncts.get(0).pred.equals("sumo")) {
                String sumoTerm = c.disjuncts.get(0).arg1;
                String word = c.disjuncts.get(0).arg2;
                for (semRewrite.Clause m : cnf.clauses) {
                    if (m != null && m.disjuncts != null && m.disjuncts.get(0)
                            != null && m.disjuncts.get(0).arg1 != null && m.disjuncts.get(0).arg1.equals(word)) {
                        m.disjuncts.get(0).arg1 = sumoTerm;
                    }
                    if (m != null && m.disjuncts != null && m.disjuncts.get(0)
                            != null && m.disjuncts.get(0).arg2 != null && m.disjuncts.get(0).arg2.equals(word)) {
                        m.disjuncts.get(0).arg2 = sumoTerm;
                    }
                }
                // remove sumo() clauses
                iterator.remove();
            }
        }
        return cnf;
    }

    /***********************************************************
     */
    private static void generateCNFForQAPairs(List list) throws IOException {

        Map res = new HashMap();
        Interpreter inter = new Interpreter();
        KBmanager.getMgr().initializeOnce();
        inter.initialize();
        for (QAPair q : list) {
            CNF cnf = inter.interpretGenCNF(q.query);
            cnf = preProcessCNF(cnf);
            System.out.println(cnf);
            q.queryCNF = cnf;
            cnf = inter.interpretGenCNF(q.answer);
            cnf = preProcessCNF(cnf);
            System.out.println(cnf);
            q.answerCNF = cnf;
        }
    }

    /***********************************************************
     */
    public static Map generateCNFForStringSet(Map sentences) throws IOException {

        Map res = new HashMap();
        Interpreter inter = new Interpreter();
        KBmanager.getMgr().initializeOnce();
        inter.initialize();
        for (Integer index : sentences.keySet()) {
            String q = sentences.get(index);
            try {
                CNF cnf = inter.interpretGenCNF(q);
                cnf = preProcessCNF(cnf);
                System.out.println(cnf);
                res.put(index, cnf);
            }
            catch (Exception e) {
                System.out.println("Exception occurs in " + q);
                e.printStackTrace();
            }
        }
        return res;
    }

    /***********************************************************
     */
    public static CNF findOneCommonCNF(Collection input) {

        CNF res = new CNF();
        boolean isFirst = true;
        System.out.println("Among the following CNF: \n");
        for (CNF c : input) {
            System.out.println(c);
            if (!isFirst) {
                res = unification(res, c);
            }
            else {
                res = c;
                isFirst = false;
            }
        }
        System.out.println("\n The common CNF is " + res);
        return res;
    }

    /***********************************************************
     */
    private static CNF unification(CNF unifier, CNF unified) {

        CNF rescnf = new CNF();
        unifier.clauses.sort(clauseComparator);
        unified.clauses.sort(clauseComparator);
        for (semRewrite.Clause m : unifier.clauses) {
            for (semRewrite.Clause n : unified.clauses) {
                semRewrite.Clause h = m.deepCopy();
                n = n.deepCopy();
                semRewrite.Clause c = isRelated(h, n);
                if (c != null) {
                    rescnf.clauses.add(c);
                    break;
                }
            }
        }
        return rescnf;
    }

    /***********************************************************
     */
    private static semRewrite.Clause isRelated(semRewrite.Clause m, semRewrite.Clause n) {

        if (!m.disjuncts.get(0).pred.equals(n.disjuncts.get(0).pred))
            return null;
        String marg1 = m.disjuncts.get(0).arg1;
        String narg1 = n.disjuncts.get(0).arg1;
        String marg2 = m.disjuncts.get(0).arg2;
        String narg2 = n.disjuncts.get(0).arg2;
        String ca = findCommonAncesstor(marg1, narg1);
        if (ca != null) {
            marg1 = ca;
            narg1 = ca;
        }
        String ca1 = findCommonAncesstor(marg2, narg2);
        if (ca1 != null) {
            marg2 = ca1;
            narg2 = ca1;
        }
        if (ca != null && ca1 != null) {
            Literal l = new Literal();
            l.pred = m.disjuncts.get(0).pred;
            l.arg1 = ca;
            l.arg2 = ca1;
            semRewrite.Clause res = new semRewrite.Clause();
            res.disjuncts = Lists.newArrayList(l);
            return res;
        }
        return null;
    }

    /***********************************************************
     */
    private static void transformQAPairListtoCNFSet(List list, Map sentences, Map cnfs) {

        int index = 0;
        for (QAPair q : list) {
            sentences.put(index, q.query);
            cnfs.put(index, q.queryCNF);
            index++;
            sentences.put(index, q.answer);
            cnfs.put(index, q.answerCNF);
            index++;
        }
    }

    /***********************************************************
     */
    private static Map> getCommonCNF(Map map) {

        Map> res = new HashMap>();
        HashMap bindmap;
        for (Integer i = 0; i < map.keySet().size(); i++) {
            CNF cnfOut = map.get(i);
            Map mapfori = new HashMap();
            for (Integer j = i + 1; j < map.keySet().size(); ++j) {
                CNF cnfIn = map.get(j);
                CNF cnfnew = unification(cnfIn, cnfOut);
                if (cnfnew.clauses.size() > 0)
                    mapfori.put(j, cnfnew);
            }
            res.put(i, mapfori);
        }
        Iterator>> iterator = res.entrySet().iterator();
        while (iterator.hasNext()) {
            Map.Entry e = iterator.next();
            if (e.getValue() == null)
                iterator.remove();
        }
        return res;
    }

    /***********************************************************
     * find common ancesstor of two sumo terms in the sumo term family tree
     *
     */
    private static String findCommonAncesstor(String s1, String s2) {

        if ((s1.contains("\"") || s1.contains("-")) || (s2.contains("-") || s2.contains("\"")) || (s1.startsWith("?") || (s2.startsWith("?"))))
            return "?X";
        HashSet p1 = kb.kbCache.parents.get("subclass").get(s1);
        if (p1 == null) p1 = new HashSet();
        HashSet m = kb.kbCache.parents.get("subrelation").get(s1);
        if (m != null)
            p1.addAll(m);
        m = kb.kbCache.parents.get("subAttribute").get(s1);
        if (m != null)
            p1.addAll(m);
        p1.add(s1);
        HashSet p2 = kb.kbCache.parents.get("subclass").get(s2);
        if (p2 == null) p2 = new HashSet();
        m = kb.kbCache.parents.get("subrelation").get(s2);
        if (m != null)
            p2.addAll(m);
        p2.add(s2);
        m = kb.kbCache.parents.get("subAttribute").get(s2);
        if (m != null)
            p2.addAll(m);
        Collection common = getCommon(p1, p2);
        if (common.size() < 1)
            return null;
        for (String k : common) {
            HashSet children = kb.kbCache.children.get("subrelation").get(k);
            if (children == null) children = new HashSet<>();
            m = kb.kbCache.children.get("subAttribute").get(k);
            if (m != null)
                children.addAll(m);
            m = kb.kbCache.children.get("subclass").get(k);
            if (m != null)
                children.addAll(m);
            boolean isClosest = true;
            for (String n : common) {
                if (children.contains(n)) {
                    isClosest = false;
                    break;
                }
            }
            if (isClosest) return k;
        }
        return null;
    }

    /***********************************************************
     * get Common objects between two collections
     */
    private static Collection getCommon(Collection c1, Collection c2) {

        Iterator iterator = c1.iterator();
        while (iterator.hasNext()) {
            Object o1 = iterator.next();
            if (!c2.contains(o1))
                iterator.remove();
        }
        return c1;
    }

    /***********************************************************
     * reverse the map to get the reversed result for further analysis
     */
    private static Map>> reverseMap(Map> input) {

        Map>> res = new HashMap>>();
        for (Integer i : input.keySet()) {
            Map m = input.get(i);
            for (Integer j : m.keySet()) {
                CNF cnf = m.get(j);
                Set> mid = res.get(cnf);
                if (mid == null)
                    mid = new HashSet>();
                mid.add(new Pair(i, j));
                res.put(cnf, mid);
            }
        }
        return res;
    }

    /***********************************************************
     */
    public static class Pair {

        F first;
        S second;

        public Pair(F f, S s) {

            first = f;
            second = s;
        }

        @Override
        public boolean equals(Object o) {

            if (!(o instanceof Pair))
                return false;
            Pair p = (Pair) o;
            return (p.first.equals(this.first) && p.second.equals(this.second)) ||
                    (p.first.equals(this.second) && p.second.equals(this.first));
        }

        @Override
        public int hashCode() {

            return first.hashCode() + second.hashCode();
        }

        @Override
        public String toString() {

            return '[' + first.toString() + ',' + second.toString() + ']';
        }
    }

    /***********************************************************
     * DTO class for json input and output
     */
    public static class QAPair {

        Integer index;
        String file;
        String query;
        String answer;
        CNF queryCNF;
        CNF answerCNF;

        public QAPair(Integer index, String file, String query, String answer) {

            this.index = index;
            this.file = file;
            this.query = query;
            this.answer = answer;
        }

        public String getQuery() {

            return query;
        }

        public String getAnswer() {

            return answer;
        }

        public Integer getIndex() {

            return index;
        }

        public String getFile() {

            return file;
        }

        public CNF getQueryCNF() {

            return queryCNF;
        }

        public CNF getAnswerCNF() {

            return answerCNF;
        }

        public String toString() {

            StringBuilder sb = new StringBuilder();
            sb.append("{\n");
            sb.append("  \"index\":\"" + index + "\",\n");
            sb.append("  \"file\":\"" + file + "\",\n");
            sb.append("  \"query\":\"" + query + "\",\n");
            sb.append("  \"queryCNF\":\"" + queryCNF + "\",\n");
            sb.append("  \"answer\":\"" + answer + "\",\n");
            sb.append("  \"answerCNF\":\"" + answerCNF + "\"\n");
            sb.append("}\n");
            return sb.toString();
        }
    }

    /***********************************************************
     * function to load text file and generate one common CNF for all sentences,
     * one sentence one line.
     */
    public static CNF loadFileAndFindCommonCNF(String path) throws IOException {

        Map strs = loadSentencesMap(path);
        Map cnfMap = semRewrite.CommonCNFUtil.generateCNFForStringSet(strs);
        System.out.println("\nSentences are:\n");
        for (Integer i : strs.keySet()) {
            System.out.println(strs.get(i));
            System.out.println(cnfMap.get(i));
        }
        CNF cnf = semRewrite.CommonCNFUtil.findOneCommonCNF(cnfMap.values());
        return cnf;
    }

    /***********************************************************
     * function to find all path between two literals in CNF.
     */
    private static ArrayList findAllPathBetweenLiterals(CNF cnf, String s1, String s2) {

        ArrayList res = new ArrayList();
        findPathBFS(cnf.clauses, s1, s2, new ArrayList(), 10, res);
        return res;
    }

    private static void findPathBFS(ArrayList clauses, String s1, String s2, ArrayList path, int max, ArrayList res) {

        if (max < 0) return;
        if (isContained(path, s2)) {
            CNF cnf = new CNF();
            cnf.clauses = Lists.newArrayList(path);
            res.add(cnf);
            return;
        }
        for (semRewrite.Clause c : clauses) {
            if (isContained(c, s1)) {
                if (isIgnore(c) || path.contains(c))
                    continue;
                path.add(c);
                findPathBFS(clauses, getOtherArgument(c, s1), s2, path, max - 1, res);
                path.remove(c);
            }
        }
    }

    /***********************************************************
     * the ignore list for finding path between literals
     */
    private static boolean isIgnore(semRewrite.Clause c) {

        List ignoreList = Arrays.asList(new String[]{"number", "sumo", "tense"});
        if (ignoreList.contains(c.disjuncts.get(0).pred))
            return true;
        return false;
    }

    /***********************************************************
     * find if a literal is already in the clause
     */
    private static boolean isContained(semRewrite.Clause c, String s) {

        for (Literal l : c.disjuncts) {
            if (s.equals(l.arg1) || s.equals(l.arg2))
                return true;
        }
        return false;
    }

    /***********************************************************
     * find if a literal is already in a list of clause
     */
    private static boolean isContained(ArrayList arr, String s) {

        for (semRewrite.Clause c : arr) {
            if (isContained(c, s))
                return true;
        }
        return false;
    }

    /***********************************************************
     * get the other argument in the clause
     */
    private static String getOtherArgument(Clause c, String arg) {

        if (c.disjuncts.get(0).arg1.equals(arg))
            return c.disjuncts.get(0).arg2;
        else
            return c.disjuncts.get(0).arg1;
    }

    /***********************************************************
     * find all the paths between two literals
     */
    public static void findPath(String cnfStr, String s1, String s2) {

        if (cnfStr == null)
            cnfStr = "sumo(Nation,country-2), number(SINGULAR,country-2), det(country-2,which-1), root(ROOT-0,be-3), tense(PRESENT,be-3), dep(be-3,country-2), sumo(Object,world-5), number(SINGULAR,world-5), det(world-5,the-4), number(SINGULAR,biodiesel-8), sumo(Position,producer-9), number(SINGULAR,producer-9), nsubj(be-3,producer-9), poss(producer-9,world-5), amod(producer-9,largest-7), nn(producer-9,biodiesel-8)";
        if (s1 == null)
            s1 = "biodiesel-8";
        if (s2 == null)
            s2 = "country-2";
        CNF cnf = CNF.parseSimple(new Lexer(cnfStr));
        ArrayList res = findAllPathBetweenLiterals(cnf, s1, s2);
        for (CNF c : res) {
            System.out.println(c);
        }
    }

    /***********************************************************
     */
//    public static void testJSONQAPair() {
//
//        List list = loadSentencesFormJsonFile("test/corpus/java/resources/IRtests.json");
//        generateCNFForQAPairs(list);
//        String path = saveCNFMaptoFile(list, "cache.json");
//        list = loadCNFMapfromFile("cache.json");
//        for (QAPair e : list)
//            System.out.println(e);
//        Map cnfs = new HashMap();
//        Map sentences = new HashMap();
//        transformQAPairListtoCNFSet(list, sentences, cnfs);
//        Map> rr = getCommonCNF(cnfs);
//        for (Map.Entry e : rr.entrySet()) {
//            System.out.println(e);
//        }
//        Map>> re = reverseMap(rr);
//        for (CNF cnf : re.keySet()) {
//            System.out.println(cnf.toString() + re.get(cnf));
//        }
//    }

//    public static void testFile() {
//
//        String path = "/Users/peigenyou/workspace/test.txt";
//        CNF cnf = loadFileAndFindCommonCNF(path);
//    }

    /***********************************************************
     */
    public static void main(String[] args) {

//        String[] strings = new String[]{"Amelia flies.", "John walks."};
//        testJSONQAPair();
//        testFile();
        findPath(null, null, null);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy