semRewrite.Literal Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
There is a newer version: 1.1
package semRewrite;

/*
Copyright 2014-2015 IPsoft

Author: Adam Pease [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA  02111-1307 USA 
*/

import com.articulate.sigma.StringUtil;
import semRewrite.RuleSet;

import java.text.ParseException;
import java.util.HashMap;
import java.util.List;

/** *************************************************************
 * pred(arg1,arg2).  
 * arg1 and/or arg2 can be variables which are denoted by a 
 * leading '?'
 * The literal can be negated.  It also has flags for whether it
 * needs to be preserved, rather than consumed, during unification,
 * and whether it has already been successfully bound to another
 * literal during unification.
 */
public class Literal {

    public boolean negated = false;
    public boolean preserve = false;
    public boolean bound = false; // bound clauses in a delete rule get deleted
    public String pred;
    public String arg1;
    public String arg2;
    
    /** ***************************************************************
     */
    public String toString() {
        
        StringBuffer sb = new StringBuffer();
        if (bound)
            sb.append("X");
        if (negated)
            sb.append("~");
        if (preserve)
            sb.append("+");
        sb.append(pred + "(" + arg1 + "," + arg2 + ")");
        return sb.toString();
    }
    
    /** ***************************************************************
     */
    public semRewrite.Literal deepCopy() {
        
        semRewrite.Literal newc = new semRewrite.Literal();
        newc.negated = negated;
        newc.preserve = preserve;
        newc.bound = bound;
        newc.pred = pred;
        newc.arg1 = arg1;
        newc.arg2 = arg2;
        return newc;
    }
    
    /** ***************************************************************
     */
    @Override
    public boolean equals(Object o) {
    
        if (!(o instanceof semRewrite.Literal))
            return false;
        semRewrite.Literal c = (semRewrite.Literal) o;
        if (negated != c.negated)
            return false;
        if (!pred.equals(c.pred))
            return false;
        if (!arg1.equals(c.arg1))
            return false;
        if (!arg2.equals(c.arg2))
            return false;
        return true;
    }
    
    /** ***************************************************************
     * @return true if the clause does not contain any variables
     */
    public boolean isGround() {
        
        if (!arg1.startsWith("?") && !arg2.startsWith("?"))
            return true;
        else
            return false;
    }
    
    /** *************************************************************
     * If the tokens in the literal are derived from words parsed
     * from the Stanford dependency parser, and therefore in the form
     * word-xx, where xx are digits, prepend a '?' to signify that
     * it's a variable.
     */
    public void preProcessQuestionWords(List qwords) {
        
        for (String s: qwords) {
            System.out.println("INFO in Clause.preProcessQuestionWords(): " + s + " " + arg1 + " " + arg2);
            if (arg1.toLowerCase().matches(s.toLowerCase() + "-\\d+"))
                arg1 = "?" + arg1;
            if (arg2.toLowerCase().matches(s.toLowerCase() + "-\\d+"))
                arg2 = "?" + arg2;
        }
    }
    
    /** ***************************************************************
     * Apply variable substitutions to a literal  
     */
    public void applyBindingSelf(HashMap bindings) {
        
        if (arg1.startsWith("?")) {
            if (bindings.containsKey(arg1))
                arg1 = bindings.get(arg1);
        }
        if (arg2.startsWith("?")) {
            if (bindings.containsKey(arg2))
                arg2 = bindings.get(arg2);
        }
    }
    
    /** ***************************************************************
     * @return a literal after applying variable substitutions to a literal  
     */
    public semRewrite.Literal applyBindings(HashMap bindings) {
        
        //System.out.println("INFO in Clause.applyBindings(): this: " + this);
        //System.out.println("INFO in Clause.applyBindings(): bindings: " + bindings);
        semRewrite.Literal c = new semRewrite.Literal();
        c.pred = pred;
        c.negated = negated;
        c.preserve = preserve;
        if (StringUtil.emptyString(arg1) || StringUtil.emptyString(arg2)) {
            System.out.println("Error in Clause.applyBindings(): Empty argument(s): " + this);
            c.arg1 = arg1;
            c.arg2 = arg2;
            return c;
        }
        if (arg1.startsWith("?")) {
            if (bindings.containsKey(arg1))
                c.arg1 = bindings.get(arg1);
            else
                c.arg1 = arg1;
        }
        else
            c.arg1 = arg1;
        if (arg2.startsWith("?")) {
            if (bindings.containsKey(arg2))
                c.arg2 = bindings.get(arg2);
            else
                c.arg2 = arg2;
        }
        else
            c.arg2 = arg2;
        //System.out.println("INFO in Clause.applyBindings(): returning this: " + c);
        return c;
    }
    
    /** ***************************************************************
     * @return a boolean indicating whether a variable occurs in a literal.
     * This is a degenerate case of general case of occurs check during
     * unification, since we have no functions and argument lists are 
     * always of length 2.
     */
    private static boolean occursCheck(String t, semRewrite.Literal c) {
        
        if (t.equals(c.arg1) || t.equals(c.arg2))
            return true;
        else
            return false;
    }
    
    /** ***************************************************************
     * @return false if there are wildcards and they don't match (or 
     * there's an error) and true if there are no wildcards.  Match
     * is case-insensitive.  Wildcards only allow for ignoring the
     * word-number suffix as in wildcard-5 would match wildcard*.
     */
    private static boolean wildcardMatch(String t1, String t2) {
        
        //System.out.println("INFO in Clause.wildcardMatch(): attempting to match: " + t1 + " " + t2);
        String s1 = t1;
        String s2 = t2;
        if (!t1.contains("*") && !t2.contains("*")) // no wildcards case should fall through
            return true;
        if (t1.contains("*") && t2.contains("*")) {
            System.out.println("Error in Clause.wildcardMatch(): both arguments have wildcards: " + t1 + " " + t2);
            return false;
        }
        if (t2.contains("*")) {
            s1 = t2;
            s2 = t1;
        }
        if (s1.indexOf('*') > -1 && s2.indexOf('-') > -1) {  // when wildcard, both have to be matching variables
                                                             // except for suffix
            if (!s1.substring(0,s1.lastIndexOf('*')).equalsIgnoreCase(s2.substring(0,s2.lastIndexOf('-'))))
                return false;
        }
        return true;
    }
        
    /** ***************************************************************
     * Unify all terms in term1 with the corresponding terms in term2 with a
     * common substitution. Note that unlike general unification, we have
     * a fixed argument list of 2.   
     * @return the set of substitutions with the variable as the key and
     * the binding as the value in the HashMap.
     */
    public HashMap mguTermList(semRewrite.Literal l2) {

        //System.out.println("INFO in Clause.mguTermList(): attempting to unify " + this + " and " + l2);
        HashMap subst = new HashMap();
        
        if (!pred.equals(l2.pred)) 
            return null;        
        for (int arg = 1; arg < 3; arg++) {           
            String t1 = arg1; // Pop the first term pair to unify off the lists            
            String t2 = l2.arg1; // (removes and returns the denoted elements).
            if (arg == 2) {
                t1 = arg2;            
                t2 = l2.arg2;
            }
            //System.out.println("INFO in Clause.mguTermList(): attempting to unify arguments " + t1 + " and " + t2); 
            if (t1.startsWith("?")) {
                //System.out.println("INFO in Clause.mguTermList(): here 1");
                if (t1.equals(t2))
                    // We could always test this upfront, but that would
                    // require an expensive check every time. 
                    // We descend recursively anyway, so we only check this on
                    // the terminal case.  
                    continue;
                if (occursCheck(t1,l2))
                    return null;
                // We now create a new substitution that binds t2 to t1, and
                // apply it to the remaining unification problem. We know
                // that every variable will only ever be bound once, because
                // we eliminate all occurrences of it in this step - remember
                // that by the failed occurs-check, t2 cannot contain t1.
                HashMap newBinding = new HashMap();
                if (!wildcardMatch(t1,t2)) 
                    return null;
                newBinding.put(t1,t2);                
                applyBindingSelf(newBinding);
                l2 = l2.applyBindings(newBinding);
                subst.put(t1, t2);
            }
            else if (t2.startsWith("?")) {
                //System.out.println("INFO in Clause.mguTermList(): here 2");
                // Symmetric case - We know that t1!=t2, so we can drop this check
                if (occursCheck(t2, this))
                    return null;
                HashMap newBinding = new HashMap();
                if (!wildcardMatch(t1,t2)) 
                    return null;
                newBinding.put(t2, t1);          
                applyBindingSelf(newBinding);
                l2 = l2.applyBindings(newBinding);
                subst.put(t2, t1);
            }
            else {
                //System.out.println("INFO in Clause.mguTermList(): t1 " + t1 + " t2 " + t2);
                if (!t1.equals(t2)) {
                    if (t1.indexOf('*') > -1 && t2.indexOf('-') > -1) {
                        if (!t1.substring(0,t1.lastIndexOf('*')).equalsIgnoreCase(t2.substring(0,t2.lastIndexOf('-'))))
                            return null;
                    }
                    else if (t2.indexOf('*') > -1 && t1.indexOf('-') > -1) {
                        if (!t2.substring(0,t2.lastIndexOf('*')).equalsIgnoreCase(t1.substring(0,t1.lastIndexOf('-'))))
                            return null;
                    }
                    else
                        return null;
                }
            }
        }
        //System.out.println("INFO in Clause.mguTermList(): subst on exit: " + subst);
        return subst;
    }
    
    /** ***************************************************************
     * @param lex is a Lexer which has been initialized with the 
     * textual version of the Literal
     * @param startLine is the line in the text file at which the
     * literal appears.  If it is in a large rule the start line
     * could be different than the actual line of text for the literal.
     * If the literal is just from a string rather than directly from
     * a text file then the startLine will be 0.
     * @return a Literal corresponding to the input text passed to the
     * Lexer.  Note that the predicate in this literal must already 
     * have been read
     */
    public static semRewrite.Literal parse(Lexer lex, int startLine) {

        String errStart = "Parsing error in " + RuleSet.filename;
        String errStr;
        semRewrite.Literal cl = new semRewrite.Literal();
        try {
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            if (lex.testTok(Lexer.Plus)) {
                cl.preserve = true;
                lex.next();
            }
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            cl.pred = lex.next();
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            if (!lex.testTok(Lexer.OpenPar)) {
                errStr = (errStart + ": Invalid token '" + lex.look() + "' near line " + startLine + " on input " + lex.line);
                throw new ParseException(errStr, startLine);
            }
            lex.next();
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            cl.arg1 = lex.next();
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            if (!lex.testTok(Lexer.Comma)) {
                errStr = (errStart + ": Invalid token '" + lex.look() + "' near line " + startLine + " on input " + lex.line);
                throw new ParseException(errStr, startLine);
            }
            lex.next();
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            cl.arg2 = lex.next();
            //System.out.println("INFO in Clause.parse(): " + lex.look());
            if (!lex.testTok(Lexer.ClosePar)) {
                errStr = (errStart + ": Invalid token '" + lex.look() + "' near line " + startLine + " on input " + lex.line);
                throw new ParseException(errStr, startLine);
            } 
            lex.next();
        }
        catch (Exception ex) {
            String message = ex.getMessage();
            System.out.println("Error in Clause.parse() " + message);
            ex.printStackTrace();
        }    
        //System.out.println("INFO in Clause.parse(): returning " + cl);
        return cl;
    }
    
    /** *************************************************************
     * A test method for unification
     */
    public static void testUnify() {
        
        String s1 = "sumo(Human,Mary-1)";
        String s2 = "sumo(?O,Mary-1)";
        semRewrite.Literal c1 = null;
        semRewrite.Literal c2 = null;
        try {
            Lexer lex = new Lexer(s1);
            lex.look();
            c1 = semRewrite.Literal.parse(lex, 0);
            lex.look();
            lex = new Lexer(s2);
            c2 = semRewrite.Literal.parse(lex, 0);
        }
        catch (Exception ex) {
            String message = ex.getMessage();
            System.out.println("Error in Clause.parse() " + message);
            ex.printStackTrace();
        }   
        System.out.println("INFO in Clause.testUnify(): " + c1.mguTermList(c2));
        System.out.println("INFO in Clause.testUnify(): " + c2.mguTermList(c1));
    }
    
    /** *************************************************************
     * A test method for wildcard unification
     */
    public static void testRegexUnify() {
        
        String s1 = "pobj(at-1,Mary-1).";
        String s2 = "pobj(at*,?M).";
        String s3 = "pobj(boo-3,?M).";
        System.out.println("INFO in Clause.testRegexUnify(): attempting parses ");
        semRewrite.Literal c1 = null;
        semRewrite.Literal c2 = null;
        semRewrite.Literal c3 = null;
        try {
            Lexer lex = new Lexer(s1);
            lex.look();
            c1 = semRewrite.Literal.parse(lex, 0);
            System.out.println("INFO in Clause.testRegexUnify(): parsed " + c1);
            lex.look();
            lex = new Lexer(s2);
            c2 = semRewrite.Literal.parse(lex, 0);
            System.out.println("INFO in Clause.testRegexUnify(): parsed " + c2);
            lex = new Lexer(s3);
            c3 = semRewrite.Literal.parse(lex, 0);
            System.out.println("INFO in Clause.testRegexUnify(): parsed " + c3);
        }
        catch (Exception ex) {
            String message = ex.getMessage();
            System.out.println("Error in Clause.parse() " + message);
            ex.printStackTrace();
        }   
        System.out.println("INFO in Clause.testRegexUnify(): " + c1.mguTermList(c2));
        System.out.println("INFO in Clause.testRegexUnify(): " + c2.mguTermList(c1));
        System.out.println("INFO in Clause.testRegexUnify(): should fail: " + c2.mguTermList(c3));
    }
    
    /** *************************************************************
     * A test method for parsing a Literal
     */
    public static void testParse() {
        
        try {
            String input = "+det(bank-2, The-1).";
            Lexer lex = new Lexer(input);
            lex.look();
            System.out.println(semRewrite.Literal.parse(lex, 0));
        }
        catch (Exception ex) {
            String message = ex.getMessage();
            System.out.println("Error in Clause.parse() " + message);
            ex.printStackTrace();
        }   
    }
    
    /** *************************************************************
     * A test method
     */
    public static void main (String args[]) {
        
        testParse();
        //testUnify();
        //testRegexUnify();
    }
}