All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.washington.cs.knowitall.logic.LogicExpressionParser Maven / Gradle / Ivy

package edu.washington.cs.knowitall.logic;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.google.common.base.Function;
import com.google.common.collect.Lists;

import edu.washington.cs.knowitall.logic.Expression.Arg;
import edu.washington.cs.knowitall.logic.Expression.Op;
import edu.washington.cs.knowitall.logic.Expression.Paren;
import edu.washington.cs.knowitall.logic.LogicException.TokenizeLogicException;

/**
 * A logic expression engine that operates over user specified objects.
 *
 * @author Michael Schmitz 
 *
 * @param    the type of the base expressions
 */
abstract public class LogicExpressionParser implements Function> {
    /***
     * Create a LogicExpression object from the supplied string.
     * @param string
     * @return
     */
    public LogicExpression parse(String string) {
        List> expressions = this.tokenize(string);
        return new LogicExpression(expressions);
    }

    @Override
    public LogicExpression apply(String string) {
        return this.parse(string);
    }

    /***
     * The factory method creates an argument from the supplied token string.
     * @param  argument  a string representation of a token
     * @return  an evaluatable representation of a token
     */
    public abstract Arg factory(String argument);

    public final static Pattern doubleQuoteStringLiteralRegex =
            Pattern.compile("\"" + "([^\"\\p{Cntrl}\\\\]*+(?:\\\\[\\\\'\"bfnrt])*+(?:\\\\u[a-fA-F0-9]{4})*+)*+" + "\"");
    public final static Pattern singleQuoteStringLiteralRegex =
            Pattern.compile("'" + "(?:[^']*+)" + "'");
    public final static Pattern regexLiteralRegex =
            Pattern.compile("/" + "(?:(?:[^/\\\\]*+(?:\\\\)*+(?:\\\\/)*+)*+)" + "/");
    private final static List literalPatterns = Lists.newArrayList(
            doubleQuoteStringLiteralRegex, singleQuoteStringLiteralRegex,
            regexLiteralRegex);

    /***
     * The readToken method reads a token from the remaining LogicExpression string.
     *
     * A token may contain a string.  If it contains parentheses, the token
     * will last until the parentheses are balanced.  And &, |, or unbalanced )
     * will mark the end of a token.
     *
     * This is a default implementation that may be overriden.
     * @param  remainder  the remaining text to tokenize
     * @return  a token from the beginning on `remaining`
     */
    public String readToken(String remainder) {
        final String token;
        try {
            Stack parens = new Stack();

            int nextExpression;
            for (nextExpression = 0; nextExpression < remainder.length(); nextExpression++) {
                char c = remainder.charAt(nextExpression);

                // check for quotation
                String match = null;
                for (Pattern pattern : literalPatterns) {
                    Matcher matcher = pattern.matcher(remainder).region(
                            nextExpression, remainder.length());
                    if (matcher.lookingAt()) {
                        match = matcher.group(0);
                        break;
                    }
                }

                if (match != null) {
                    // we found and can consume a quotation
                    nextExpression += match.length() - 1;
                } else if (c == '(') {
                    parens.push(c);
                } else if (c == ')') {
                    if (parens.isEmpty()) {
                        break;
                    } else {
                        parens.pop();
                    }
                } else if (c == '&' || c == '|') {
                    break;
                }
            }

            token = remainder.substring(0, nextExpression).trim();
        } catch (Exception e) {
            throw new TokenizeLogicException("Error parsing token: "
                    + remainder, e);
        }

        if (token.isEmpty()) {
            throw new TokenizeLogicException("zero-length token found.");
        }

        return token;
    }

    /***
     * Convert an infix string logic representation to an infix list of tokens.
     * @param input an infix string logic representation.
     * @param factory a delegate that converts a string representation of an
     * argument into a token object.  @return
     *
     * @throws TokenizeLogicException
     */
    public List> tokenize(String input)
    throws TokenizeLogicException {
        List> tokens = new ArrayList>();

        int i = 0;
        while (i < input.length()) {
            String substring = input.substring(i);
            char firstChar = substring.charAt(0);

            if (firstChar == ' ') {
                i += 1;
                continue;
            }
            else if (firstChar == '(') {
                tokens.add(new Paren.L());
                i += 1;
            } else if (firstChar == ')') {
                tokens.add(new Paren.R());
                i += 1;
            } else if (firstChar == '!') {
                tokens.add(new Op.Mon.Not());
                i += 1;
            } else if (firstChar == '&') {
                tokens.add(new Op.Bin.And());
                i += 1;
            } else if (firstChar == '|') {
                tokens.add(new Op.Bin.Or());
                i += 1;
            } else {
                // parse out the token
                String token = this.readToken(substring);

                tokens.add(factory(token));
                i += token.length();
            }
        }

        return tokens;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy