edu.washington.cs.knowitall.nlp.ChunkedSentenceToken Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of reverb-core Show documentation
A regular-expression based Open IE relation extractor.
There is a newer version: 1.4.3
package edu.washington.cs.knowitall.nlp;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import com.google.common.base.Function;
import com.google.common.base.Predicate;

import edu.washington.cs.knowitall.commonlib.Range;

/***
 * A representation of a token in a ChunkedSentence.
 *
 * @author schmmd
 *
 */
public class ChunkedSentenceToken {
    public final ChunkedSentence ChunkedSentence;
    public final int index;

    public ChunkedSentenceToken(ChunkedSentence ChunkedSentence, int index) {
        this.ChunkedSentence = ChunkedSentence;
        this.index = index;
    }

    public static List tokenize(ChunkedSentence sentence) {
        return ChunkedSentenceToken.tokenize(sentence, sentence.getRange());
    }

    public static List tokenize(ChunkedSentence sentence, Range range) {
        List tokens = new ArrayList(
                sentence.getLength());
        for (int i = range.getStart(); i < range.getEnd(); i++) {
            tokens.add(new ChunkedSentenceToken(sentence, i));
        }

        return tokens;
    }

    /***
     * The string of this token.
     *
     * @return
     */
    public String string() {
        return this.ChunkedSentence.getTokens().get(this.index);
    }

    /***
     * The part of speech tag of this token.
     *
     * @return
     */
    public String pos() {
        return this.ChunkedSentence.getPosTag(this.index);
    }

    /***
     * The chunk tag of this token.
     *
     * @return
     */
    public String chunk() {
        return this.ChunkedSentence.getChunkTag(this.index);
    }

    public String toString() {
        return this.ChunkedSentence.getToken(index);
    }

    public static final Function toStringFunction = new Function() {
        @Override
        public String apply(ChunkedSentenceToken token) {
            return token.ChunkedSentence.getToken(token.index);
        }
    };

    /***
     * An expression that is evaluated against a token.
     *
     * @author schmmd
     *
     */
    protected static abstract class Expression implements
            Predicate {
    }

    /***
     * A regular expression that is evaluated against the string portion of a
     * token.
     *
     * @author schmmd
     *
     */
    protected static class StringExpression extends Expression {
        final Pattern pattern;

        public StringExpression(String string, int flags) {
            pattern = Pattern.compile(string, flags);
        }

        public StringExpression(String string) {
            this(string, Pattern.CASE_INSENSITIVE);
        }

        @Override
        public boolean apply(ChunkedSentenceToken token) {
            return pattern.matcher(token.string()).matches();
        }
    }

    /***
     * A regular expression that is evaluated against the POS tag portion of a
     * token.
     *
     * @author schmmd
     *
     */
    protected static class PosTagExpression extends Expression {
        final Pattern pattern;

        public PosTagExpression(String string, int flags) {
            pattern = Pattern.compile(string, flags);
        }

        public PosTagExpression(String string) {
            this(string, Pattern.CASE_INSENSITIVE);
        }

        @Override
        public boolean apply(ChunkedSentenceToken token) {
            return pattern.matcher(token.pos()).matches();
        }
    }

    /***
     * A regular expression that is evaluated against the chunk tag portion of a
     * token.
     *
     * @author schmmd
     *
     */
    protected static class ChunkTagExpression extends Expression {
        final Pattern pattern;

        public ChunkTagExpression(String string, int flags) {
            pattern = Pattern.compile(string, flags);
        }

        public ChunkTagExpression(String string) {
            this(string, Pattern.CASE_INSENSITIVE);
        }

        @Override
        public boolean apply(ChunkedSentenceToken token) {
            return pattern.matcher(token.chunk()).matches();
        }
    }
}