All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.washington.cs.knowitall.nlp.ChunkedSentencePattern Maven / Gradle / Ivy

There is a newer version: 1.4.3
Show newest version
package edu.washington.cs.knowitall.nlp;

import java.io.IOException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.washington.cs.knowitall.logic.ArgFactory;
import edu.washington.cs.knowitall.logic.LogicExpression;
import edu.washington.cs.knowitall.logic.Expression.Arg;
import edu.washington.cs.knowitall.regex.Expression;
import edu.washington.cs.knowitall.regex.ExpressionFactory;
import edu.washington.cs.knowitall.regex.Match;
import edu.washington.cs.knowitall.regex.RegularExpression;

public class ChunkedSentencePattern {
    /***
     * This class compiles regular expressions over the ChunkedSentenceTokens in
     * a sentence into an NFA. There is a lot of redundancy in their
     * expressiveness. This is largely because it supports pattern matching on
     * the fields This is not necessary but is an optimization and a shorthand
     * (i.e. {@code  is equivalent to "}
     * and {@code (?: | )}.
     * 

* Here are some equivalent examples: *

    *
  1. {@code * +} *
  2. {@code * +} *
  3. {@code * +} *
  4. {@code * (?: | )+} *
* Note that (3) and (4) are not preferred for efficiency reasons. Regex OR * (in example (4)) should only be used on multi-ChunkedSentenceToken * sequences. *

* The Regular Expressions support named groups (: ... ), unnamed * groups (?: ... ), and capturing groups ( ... ). The operators allowed are * +, ?, *, and |. The Logic Expressions (that describe each * ChunkedSentenceToken) allow grouping "( ... )", not '!', or '|', and and * '&'. * * @param regex * @return */ public static RegularExpression compile(String regex) { return RegularExpression.compile(regex, new ExpressionFactory() { @Override public Expression.BaseExpression create( final String expression) { final Pattern valuePattern = Pattern .compile("([\"'])(.*)\\1"); return new Expression.BaseExpression( expression) { private final LogicExpression logic; { this.logic = LogicExpression.compile( expression, new ArgFactory() { @Override public Arg create( final String argument) { return new Arg() { private final ChunkedSentenceToken.Expression expression; { String[] parts = argument .split("="); String base = parts[0]; Matcher matcher = valuePattern .matcher(parts[1]); if (!matcher.matches()) { throw new IllegalArgumentException( "Value not enclosed in quotes (\") or ('): " + argument); } String string = matcher .group(2); if (base.equalsIgnoreCase("stringCS")) { this.expression = new ChunkedSentenceToken.StringExpression( string, 0); } else if (base .equalsIgnoreCase("string")) { this.expression = new ChunkedSentenceToken.StringExpression( string); } else if (base .equalsIgnoreCase("pos")) { this.expression = new ChunkedSentenceToken.PosTagExpression( string); } else if (base .equalsIgnoreCase("chunk")) { this.expression = new ChunkedSentenceToken.ChunkTagExpression( string); } else { throw new IllegalStateException( "unknown argument specified: " + base); } } @Override public boolean apply( ChunkedSentenceToken entity) { return this.expression .apply(entity); } }; } }); } @Override public boolean apply(ChunkedSentenceToken entity) { return logic.apply(entity); } }; } }); } public static void main(String[] args) throws ChunkerException, IOException { System.out.println("Compiling the expression... "); RegularExpression expression = ChunkedSentencePattern .compile(args[0]); System.out.println(expression); OpenNlpSentenceChunker chunker = new OpenNlpSentenceChunker(); System.out .println("Please enter a sentence to match with the above expression."); Scanner scan = new Scanner(System.in); while (scan.hasNextLine()) { String line = scan.nextLine(); ChunkedSentence chunked = chunker.chunkSentence(line); Match match = expression .match(ChunkedSentenceToken.tokenize(chunked)); if (match != null) { System.out.println(match.groups().get(0)); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy