semRewrite.CommonCNFUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
package semRewrite;/*
Copyright 2014-2015 IPsoft
Author: Peigen You [email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
*/
import com.articulate.sigma.KB;
import com.articulate.sigma.KBmanager;
import com.articulate.sigma.StringUtil;
import semRewrite.CNF;
import semRewrite.Clause;
import com.google.common.collect.Lists;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import java.io.*;
import java.util.*;
/***********************************************************
* A set of static functions that heal dealing with CNF
*
* findPath will find all paths between two literals in the CNF string
* findOneCommonCNF will try to find the intersection CNF among a list of CNFs
* getCommonCNF will try to find common CNF pair between sentences
*/
public class CommonCNFUtil {
private static List ignorePreds = Arrays.asList(new String[]{"number", "tense"/**, "root", "names"**/});
private static KBmanager kbm;
private static KB kb;
static {
kbm = KBmanager.getMgr();
kb = kbm.getKB("SUMO");
}
private static Comparator clauseComparator = new Comparator() {
@Override
public int compare(semRewrite.Clause o1, semRewrite.Clause o2) {
return o1.disjuncts.get(0).pred.compareTo(o2.disjuncts.get(0).pred);
}
};
/***********************************************************
* prevent instantiation
*/
private CommonCNFUtil() {
}
/***********************************************************
* save the intermediate parsing result to JSON file
*/
public static String saveCNFMaptoFile(List list, String path) {
File f = new File(path);
if (!f.exists())
try {
f.createNewFile();
}
catch (IOException e) {
e.printStackTrace();
}
System.out.println(f.getAbsolutePath());
try (PrintWriter pw = new PrintWriter(f)) {
JSONArray arr = new JSONArray();
for (QAPair k : list) {
JSONObject obj = new JSONObject();
obj.put("file", k.file);
obj.put("index", "" + k.index);
obj.put("query", k.query);
obj.put("queryCNF", k.queryCNF.toString());
obj.put("answer", k.answer);
obj.put("answerCNF", k.answerCNF.toString());
arr.add(obj);
}
System.out.println(arr.toJSONString());
pw.print(arr.toJSONString());
pw.flush();
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
return f.getAbsolutePath();
}
/***********************************************************
* load the intermediate parsing result to save time
*/
public static List loadCNFMapfromFile(String path) {
JSONParser jp = new JSONParser();
List res = new ArrayList();
try {
JSONArray arr = (JSONArray) jp.parse(new FileReader(path));
Iterator iterator = arr.iterator();
while (iterator.hasNext()) {
JSONObject obj = iterator.next();
Integer index = Integer.parseInt((String) obj.get("index"));
String query = (String) obj.get("query");
String answer = (String) obj.get("answer");
String file = (String) obj.get("file");
QAPair item = new QAPair(index, file, query, answer);
String k = (String) obj.get("queryCNF");
CNF cnf = CNF.parseSimple(new Lexer(k));
k = (String) obj.get("answerCNF");
CNF cnf2 = CNF.parseSimple(new Lexer(k));
item.queryCNF = cnf;
item.answerCNF = cnf2;
res.add(item);
}
}
catch (IOException e) {
e.printStackTrace();
}
catch (ParseException e) {
e.printStackTrace();
}
return res;
}
/***********************************************************
* load sentences from file, one line one sentence
*/
public static String[] loadSentencesFromTxt(String path) {
ArrayList res = new ArrayList();
try (Scanner in = new Scanner(new FileReader(path))) {
while (in.hasNextLine()) {
String line = in.nextLine();
if (StringUtil.emptyString(line))
continue;
res.add(line);
}
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
return res.toArray(new String[res.size()]);
}
/***********************************************************
* load sentences from file, one line one sentence
* return map
*/
public static Map loadSentencesMap(String path) {
Map res = new HashMap();
try (Scanner in = new Scanner(new FileReader(path))) {
int index = 0;
while (in.hasNextLine()) {
String line = in.nextLine();
res.put(index++, line);
}
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
return res;
}
/***********************************************************
* load sentences from "IRtest.json" like QA pair file
*/
public static List loadSentencesFormJsonFile(String path) {
JSONParser jp = new JSONParser();
List res = new ArrayList();
try {
JSONArray arr = (JSONArray) jp.parse(new FileReader(path));
Iterator iterator = arr.iterator();
int i = 0;
while (iterator.hasNext()) {
JSONObject obj = iterator.next();
String filename = (String) obj.get("file");
String query = (String) obj.get("query");
String answer = (String) obj.get("answer");
QAPair item = new QAPair(i++, filename, query, answer);
res.add(item);
}
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
catch (ParseException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
return res;
}
/***********************************************************
* get rid of "sumo(X,Y)" and other terms defined in ignorePreds
*/
private static CNF preProcessCNF(CNF cnf) {
Iterator iterator = cnf.clauses.iterator();
while (iterator.hasNext()) {
semRewrite.Clause c = iterator.next();
if (ignorePreds.contains(c.disjuncts.get(0).pred)) {
iterator.remove();
continue;
}
if (c.disjuncts.get(0).pred.equals("sumo")) {
String sumoTerm = c.disjuncts.get(0).arg1;
String word = c.disjuncts.get(0).arg2;
for (semRewrite.Clause m : cnf.clauses) {
if (m != null && m.disjuncts != null && m.disjuncts.get(0)
!= null && m.disjuncts.get(0).arg1 != null && m.disjuncts.get(0).arg1.equals(word)) {
m.disjuncts.get(0).arg1 = sumoTerm;
}
if (m != null && m.disjuncts != null && m.disjuncts.get(0)
!= null && m.disjuncts.get(0).arg2 != null && m.disjuncts.get(0).arg2.equals(word)) {
m.disjuncts.get(0).arg2 = sumoTerm;
}
}
// remove sumo() clauses
iterator.remove();
}
}
return cnf;
}
/***********************************************************
*/
private static void generateCNFForQAPairs(List list) throws IOException {
Map res = new HashMap();
Interpreter inter = new Interpreter();
KBmanager.getMgr().initializeOnce();
inter.initialize();
for (QAPair q : list) {
CNF cnf = inter.interpretGenCNF(q.query);
cnf = preProcessCNF(cnf);
System.out.println(cnf);
q.queryCNF = cnf;
cnf = inter.interpretGenCNF(q.answer);
cnf = preProcessCNF(cnf);
System.out.println(cnf);
q.answerCNF = cnf;
}
}
/***********************************************************
*/
public static Map generateCNFForStringSet(Map sentences) throws IOException {
Map res = new HashMap();
Interpreter inter = new Interpreter();
KBmanager.getMgr().initializeOnce();
inter.initialize();
for (Integer index : sentences.keySet()) {
String q = sentences.get(index);
try {
CNF cnf = inter.interpretGenCNF(q);
cnf = preProcessCNF(cnf);
System.out.println(cnf);
res.put(index, cnf);
}
catch (Exception e) {
System.out.println("Exception occurs in " + q);
e.printStackTrace();
}
}
return res;
}
/***********************************************************
*/
public static CNF findOneCommonCNF(Collection input) {
CNF res = new CNF();
boolean isFirst = true;
System.out.println("Among the following CNF: \n");
for (CNF c : input) {
System.out.println(c);
if (!isFirst) {
res = unification(res, c);
}
else {
res = c;
isFirst = false;
}
}
System.out.println("\n The common CNF is " + res);
return res;
}
/***********************************************************
*/
private static CNF unification(CNF unifier, CNF unified) {
CNF rescnf = new CNF();
unifier.clauses.sort(clauseComparator);
unified.clauses.sort(clauseComparator);
for (semRewrite.Clause m : unifier.clauses) {
for (semRewrite.Clause n : unified.clauses) {
semRewrite.Clause h = m.deepCopy();
n = n.deepCopy();
semRewrite.Clause c = isRelated(h, n);
if (c != null) {
rescnf.clauses.add(c);
break;
}
}
}
return rescnf;
}
/***********************************************************
*/
private static semRewrite.Clause isRelated(semRewrite.Clause m, semRewrite.Clause n) {
if (!m.disjuncts.get(0).pred.equals(n.disjuncts.get(0).pred))
return null;
String marg1 = m.disjuncts.get(0).arg1;
String narg1 = n.disjuncts.get(0).arg1;
String marg2 = m.disjuncts.get(0).arg2;
String narg2 = n.disjuncts.get(0).arg2;
String ca = findCommonAncesstor(marg1, narg1);
if (ca != null) {
marg1 = ca;
narg1 = ca;
}
String ca1 = findCommonAncesstor(marg2, narg2);
if (ca1 != null) {
marg2 = ca1;
narg2 = ca1;
}
if (ca != null && ca1 != null) {
Literal l = new Literal();
l.pred = m.disjuncts.get(0).pred;
l.arg1 = ca;
l.arg2 = ca1;
semRewrite.Clause res = new semRewrite.Clause();
res.disjuncts = Lists.newArrayList(l);
return res;
}
return null;
}
/***********************************************************
*/
private static void transformQAPairListtoCNFSet(List list, Map sentences, Map cnfs) {
int index = 0;
for (QAPair q : list) {
sentences.put(index, q.query);
cnfs.put(index, q.queryCNF);
index++;
sentences.put(index, q.answer);
cnfs.put(index, q.answerCNF);
index++;
}
}
/***********************************************************
*/
private static Map> getCommonCNF(Map map) {
Map> res = new HashMap>();
HashMap bindmap;
for (Integer i = 0; i < map.keySet().size(); i++) {
CNF cnfOut = map.get(i);
Map mapfori = new HashMap();
for (Integer j = i + 1; j < map.keySet().size(); ++j) {
CNF cnfIn = map.get(j);
CNF cnfnew = unification(cnfIn, cnfOut);
if (cnfnew.clauses.size() > 0)
mapfori.put(j, cnfnew);
}
res.put(i, mapfori);
}
Iterator>> iterator = res.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry e = iterator.next();
if (e.getValue() == null)
iterator.remove();
}
return res;
}
/***********************************************************
* find common ancesstor of two sumo terms in the sumo term family tree
*
*/
private static String findCommonAncesstor(String s1, String s2) {
if ((s1.contains("\"") || s1.contains("-")) || (s2.contains("-") || s2.contains("\"")) || (s1.startsWith("?") || (s2.startsWith("?"))))
return "?X";
HashSet p1 = kb.kbCache.parents.get("subclass").get(s1);
if (p1 == null) p1 = new HashSet();
HashSet m = kb.kbCache.parents.get("subrelation").get(s1);
if (m != null)
p1.addAll(m);
m = kb.kbCache.parents.get("subAttribute").get(s1);
if (m != null)
p1.addAll(m);
p1.add(s1);
HashSet p2 = kb.kbCache.parents.get("subclass").get(s2);
if (p2 == null) p2 = new HashSet();
m = kb.kbCache.parents.get("subrelation").get(s2);
if (m != null)
p2.addAll(m);
p2.add(s2);
m = kb.kbCache.parents.get("subAttribute").get(s2);
if (m != null)
p2.addAll(m);
Collection common = getCommon(p1, p2);
if (common.size() < 1)
return null;
for (String k : common) {
HashSet children = kb.kbCache.children.get("subrelation").get(k);
if (children == null) children = new HashSet<>();
m = kb.kbCache.children.get("subAttribute").get(k);
if (m != null)
children.addAll(m);
m = kb.kbCache.children.get("subclass").get(k);
if (m != null)
children.addAll(m);
boolean isClosest = true;
for (String n : common) {
if (children.contains(n)) {
isClosest = false;
break;
}
}
if (isClosest) return k;
}
return null;
}
/***********************************************************
* get Common objects between two collections
*/
private static Collection getCommon(Collection c1, Collection c2) {
Iterator iterator = c1.iterator();
while (iterator.hasNext()) {
Object o1 = iterator.next();
if (!c2.contains(o1))
iterator.remove();
}
return c1;
}
/***********************************************************
* reverse the map to get the reversed result for further analysis
*/
private static Map>> reverseMap(Map> input) {
Map>> res = new HashMap>>();
for (Integer i : input.keySet()) {
Map m = input.get(i);
for (Integer j : m.keySet()) {
CNF cnf = m.get(j);
Set> mid = res.get(cnf);
if (mid == null)
mid = new HashSet>();
mid.add(new Pair(i, j));
res.put(cnf, mid);
}
}
return res;
}
/***********************************************************
*/
public static class Pair {
F first;
S second;
public Pair(F f, S s) {
first = f;
second = s;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Pair))
return false;
Pair p = (Pair) o;
return (p.first.equals(this.first) && p.second.equals(this.second)) ||
(p.first.equals(this.second) && p.second.equals(this.first));
}
@Override
public int hashCode() {
return first.hashCode() + second.hashCode();
}
@Override
public String toString() {
return '[' + first.toString() + ',' + second.toString() + ']';
}
}
/***********************************************************
* DTO class for json input and output
*/
public static class QAPair {
Integer index;
String file;
String query;
String answer;
CNF queryCNF;
CNF answerCNF;
public QAPair(Integer index, String file, String query, String answer) {
this.index = index;
this.file = file;
this.query = query;
this.answer = answer;
}
public String getQuery() {
return query;
}
public String getAnswer() {
return answer;
}
public Integer getIndex() {
return index;
}
public String getFile() {
return file;
}
public CNF getQueryCNF() {
return queryCNF;
}
public CNF getAnswerCNF() {
return answerCNF;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{\n");
sb.append(" \"index\":\"" + index + "\",\n");
sb.append(" \"file\":\"" + file + "\",\n");
sb.append(" \"query\":\"" + query + "\",\n");
sb.append(" \"queryCNF\":\"" + queryCNF + "\",\n");
sb.append(" \"answer\":\"" + answer + "\",\n");
sb.append(" \"answerCNF\":\"" + answerCNF + "\"\n");
sb.append("}\n");
return sb.toString();
}
}
/***********************************************************
* function to load text file and generate one common CNF for all sentences,
* one sentence one line.
*/
public static CNF loadFileAndFindCommonCNF(String path) throws IOException {
Map strs = loadSentencesMap(path);
Map cnfMap = semRewrite.CommonCNFUtil.generateCNFForStringSet(strs);
System.out.println("\nSentences are:\n");
for (Integer i : strs.keySet()) {
System.out.println(strs.get(i));
System.out.println(cnfMap.get(i));
}
CNF cnf = semRewrite.CommonCNFUtil.findOneCommonCNF(cnfMap.values());
return cnf;
}
/***********************************************************
* function to find all path between two literals in CNF.
*/
private static ArrayList findAllPathBetweenLiterals(CNF cnf, String s1, String s2) {
ArrayList res = new ArrayList();
findPathBFS(cnf.clauses, s1, s2, new ArrayList(), 10, res);
return res;
}
private static void findPathBFS(ArrayList clauses, String s1, String s2, ArrayList path, int max, ArrayList res) {
if (max < 0) return;
if (isContained(path, s2)) {
CNF cnf = new CNF();
cnf.clauses = Lists.newArrayList(path);
res.add(cnf);
return;
}
for (semRewrite.Clause c : clauses) {
if (isContained(c, s1)) {
if (isIgnore(c) || path.contains(c))
continue;
path.add(c);
findPathBFS(clauses, getOtherArgument(c, s1), s2, path, max - 1, res);
path.remove(c);
}
}
}
/***********************************************************
* the ignore list for finding path between literals
*/
private static boolean isIgnore(semRewrite.Clause c) {
List ignoreList = Arrays.asList(new String[]{"number", "sumo", "tense"});
if (ignoreList.contains(c.disjuncts.get(0).pred))
return true;
return false;
}
/***********************************************************
* find if a literal is already in the clause
*/
private static boolean isContained(semRewrite.Clause c, String s) {
for (Literal l : c.disjuncts) {
if (s.equals(l.arg1) || s.equals(l.arg2))
return true;
}
return false;
}
/***********************************************************
* find if a literal is already in a list of clause
*/
private static boolean isContained(ArrayList arr, String s) {
for (semRewrite.Clause c : arr) {
if (isContained(c, s))
return true;
}
return false;
}
/***********************************************************
* get the other argument in the clause
*/
private static String getOtherArgument(Clause c, String arg) {
if (c.disjuncts.get(0).arg1.equals(arg))
return c.disjuncts.get(0).arg2;
else
return c.disjuncts.get(0).arg1;
}
/***********************************************************
* find all the paths between two literals
*/
public static void findPath(String cnfStr, String s1, String s2) {
if (cnfStr == null)
cnfStr = "sumo(Nation,country-2), number(SINGULAR,country-2), det(country-2,which-1), root(ROOT-0,be-3), tense(PRESENT,be-3), dep(be-3,country-2), sumo(Object,world-5), number(SINGULAR,world-5), det(world-5,the-4), number(SINGULAR,biodiesel-8), sumo(Position,producer-9), number(SINGULAR,producer-9), nsubj(be-3,producer-9), poss(producer-9,world-5), amod(producer-9,largest-7), nn(producer-9,biodiesel-8)";
if (s1 == null)
s1 = "biodiesel-8";
if (s2 == null)
s2 = "country-2";
CNF cnf = CNF.parseSimple(new Lexer(cnfStr));
ArrayList res = findAllPathBetweenLiterals(cnf, s1, s2);
for (CNF c : res) {
System.out.println(c);
}
}
/***********************************************************
*/
// public static void testJSONQAPair() {
//
// List list = loadSentencesFormJsonFile("test/corpus/java/resources/IRtests.json");
// generateCNFForQAPairs(list);
// String path = saveCNFMaptoFile(list, "cache.json");
// list = loadCNFMapfromFile("cache.json");
// for (QAPair e : list)
// System.out.println(e);
// Map cnfs = new HashMap();
// Map sentences = new HashMap();
// transformQAPairListtoCNFSet(list, sentences, cnfs);
// Map> rr = getCommonCNF(cnfs);
// for (Map.Entry e : rr.entrySet()) {
// System.out.println(e);
// }
// Map>> re = reverseMap(rr);
// for (CNF cnf : re.keySet()) {
// System.out.println(cnf.toString() + re.get(cnf));
// }
// }
// public static void testFile() {
//
// String path = "/Users/peigenyou/workspace/test.txt";
// CNF cnf = loadFileAndFindCommonCNF(path);
// }
/***********************************************************
*/
public static void main(String[] args) {
// String[] strings = new String[]{"Amelia flies.", "John walks."};
// testJSONQAPair();
// testFile();
findPath(null, null, null);
}
}