All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.washington.cs.knowitall.regex.RegularExpression Maven / Gradle / Ivy

package edu.washington.cs.knowitall.regex;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;

import com.google.common.base.Predicate;
import com.google.common.base.Joiner;
import com.google.common.base.Function;

import edu.washington.cs.knowitall.regex.Expression.BaseExpression;
import edu.washington.cs.knowitall.regex.FiniteAutomaton.Automaton;

/**
 * A regular expression engine that operates over sequences of user-specified
 * objects.
 *
 * @author Michael Schmitz 
 *
 * @param    the type of the sequence elements
 */
public class RegularExpression implements Predicate> {
    public final List> expressions;
    public final Automaton auto;

    public RegularExpression(List> expressions) {
        this.expressions = expressions;
        this.auto = RegularExpression.build(this.expressions);
    }

    /***
     * Create a regular expression without tokenization support.
     * @param expressions
     * @return
     */
    public static  RegularExpression compile(List> expressions) {
        return new RegularExpression(expressions);
    }

    /***
     * Create a regular expression from the specified string.
     * @param expression
     * @param factoryDelegate
     * @return
     */
    public static  RegularExpression compile(final String expression,
            final Function> factoryDelegate) {
        return new RegularExpressionParser() {
            @Override
            public BaseExpression factory(String token) {
                return factoryDelegate.apply(token);
            }
        }.parse(expression);
    }

    @Override
    public boolean equals(Object other) {
        if (! (other instanceof RegularExpression)) {
            return false;
        }

        RegularExpression expression = (RegularExpression) other;
        return this.toString().equals(expression.toString());
    }

    @Override
    public int hashCode() {
        return this.toString().hashCode();
    }

    @Override
    public String toString() {
        List expressions = new ArrayList(
                this.expressions.size());
        for (Expression expr : this.expressions) {
            expressions.add(expr.toString());
        }

        return Joiner.on(" ").join(expressions);
    }

    /**
     * Build an NFA from the list of expressions.
     * @param exprs
     * @return
     */
    public static  Automaton build(List> exprs) {
        Expression.MatchingGroup group = new Expression.MatchingGroup(exprs);
        return group.build();
    }

    /**
     * Apply the expression against a list of tokens.
     *
     * @return true iff the expression if found within the tokens.
     */
    @Override
    public boolean apply(List tokens) {
        if (this.find(tokens) != null) {
            return true;
        } else {
            return false;
        }
    }

    /**
     * Apply the expression against a list of tokens.
     *
     * @return true iff the expression matches all of the tokens.
     */
    public boolean matches(List tokens) {
        Match match = this.lookingAt(tokens, 0);
        return match != null && match.endIndex() == tokens.size();
    }

    /**
     * Find the first match of the regular expression against tokens. This
     * method is slightly slower due to additional memory allocations. However,
     * the response has much greater detail and is very useful for
     * writing/debugging regular expressions.
     *
     * @param tokens
     * @return an object representing the match, or null if no match is found.
     */
    public Match find(List tokens) {
        return this.find(tokens, 0);
    }

    /**
     * Find the first match of the regular expression against tokens, starting
     * at the specified index.
     *
     * @param tokens tokens to match against.
     * @param start index to start looking for a match.
     * @return an object representing the match, or null if no match is found.
     */
    public Match find(List tokens, int start) {
        Match match;
        for (int i = start; i <= tokens.size() - auto.minMatchingLength(); i++) {
            match = this.lookingAt(tokens, i);
            if (match != null) {
                return match;
            }
        }

        return null;
    }

    /**
     * Determine if the regular expression matches the beginning of the
     * supplied tokens.
     *
     * @param tokens the list of tokens to match.
     * @return an object representing the match, or null if no match is found.
     */
    public Match lookingAt(List tokens) {
        return this.lookingAt(tokens, 0);
    }

    /**
     * Determine if the regular expression matches the supplied tokens,
     * starting at the specified index.
     *
     * @param tokens the list of tokens to match.
     * @param start the index where the match should begin.
     * @return an object representing the match, or null if no match is found.
     */
    public Match lookingAt(List tokens, int start) {
        return auto.lookingAt(tokens, start);
    }

    public Match match(List tokens) {
        Match match = this.lookingAt(tokens);
        if (match != null && match.endIndex() == tokens.size()) {
            return match;
        }
        else {
            return null;
        }
    }

    /**
     * Find all non-overlapping matches of the regular expression against tokens.
     *
     * @param tokens
     * @return an list of objects representing the match.
     */
    public List> findAll(List tokens) {
        List> results = new ArrayList>();

        int start = 0;
        Match match;
        do {
            match = this.find(tokens, start);

            if (match != null) {
                start = match.endIndex();

                // match may be empty query string has all optional parts
                if (!match.isEmpty()) {
                    results.add(match);
                }
            }
        } while (match != null);

        return results;
    }

    /**
     * An interactive program that compiles a word-based regular expression
     * specified in arg1 and then reads strings from stdin, evaluating them
     * against the regular expression.
     * @param args
     */
    public static void main(String[] args) {
        Scanner scan = new Scanner(System.in);

        RegularExpression regex = RegularExpressionParsers.word.parse(args[0]);
        System.out.println("regex: " + regex);
        System.out.println();

        while (scan.hasNextLine()) {
            String line = scan.nextLine();

            System.out.println("contains: " + regex.apply(Arrays.asList(line.split("\\s+"))));
            System.out.println("matches:  " + regex.matches(Arrays.asList(line.split("\\s+"))));
            System.out.println();
        }

        scan.close();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy