All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.shiftreduce.State Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.shiftreduce;

import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Scored;
import edu.stanford.nlp.util.TreeShapedStack;

/**
 * A class which encodes the current state of the parsing.  This can
 * be used either for direct search or for beam search.
 * 
* Important information which needs to be encoded: *
    * *
  • A stack. This needs to be updatable in O(1) time to keep the * parser's run time linear. This is done by using a linked-list type * stack in which new states are created by the push * operation. * *
  • A queue. This also needs to be updatable in O(1) time. This * is accomplished by having all the states share the same list of * queued items, with different states only changing an index into the * queue. * *
  • The score of the current state. This is useful in beam searches. * *
  • Whether or not the current state is "finalized". If so, the * only thing that can be done from now on is to idle. * *
*/ public class State implements Scored { /** * Expects a list of preterminals. The preterminals should be built * with CoreLabels and have HeadWord and HeadTag annotations set. */ public State(List sentence) { this(new TreeShapedStack<>(), new TreeShapedStack<>(), findSeparators(sentence), sentence, 0, 0.0, false); } State(TreeShapedStack stack, TreeShapedStack transitions, TreeMap separators, List sentence, int tokenPosition, double score, boolean finished) { this.stack = stack; this.transitions = transitions; this.separators = separators; this.sentence = sentence; this.tokenPosition = tokenPosition; this.score = score; this.finished = finished; } /** * The stack of Tree pieces we have already assembled. */ final TreeShapedStack stack; /** * The transition sequence used to get to the current position */ final TreeShapedStack transitions; /** * Used to describe the relative location of separators to the head of a subtree */ public enum HeadPosition { NONE, LEFT, RIGHT, BOTH, HEAD }; /** * A description of where the separators such as ,;:- are in a * subtree, relative to the head of the subtree */ final TreeMap separators; Tree getStackNode(int depth) { if (depth >= stack.size()) { return null; } TreeShapedStack node = stack; for (int i = 0; i < depth; ++i) { node = node.pop(); } return node.peek(); } Tree getQueueNode(int depth) { if (tokenPosition + depth >= sentence.size()) { return null; } return sentence.get(tokenPosition + depth); } /** * Returns the first separator between two nodes or returns null if * such a thing does not exist */ String getSeparatorBetween(int right, int left) { if (right >= left) { throw new AssertionError("Expected right < left"); } return getSeparatorBetween(getStackNode(right), getStackNode(left)); } String getSeparatorBetween(Tree right, Tree left) { if (right == null || left == null) { return null; } int leftHead = ShiftReduceUtils.headIndex(left); int rightHead = ShiftReduceUtils.headIndex(right); Map.Entry nextSeparator = separators.ceilingEntry(leftHead); if (nextSeparator == null || nextSeparator.getKey() > rightHead) { return null; } return nextSeparator.getValue().substring(0, 1); } /** * Returns the separator count between two nodes * (0 if any of the nodes don't exist) */ int getSeparatorCount(int right, int left) { if (right >= left) { throw new AssertionError("Expected right < left"); } return getSeparatorCount(getStackNode(right), getStackNode(left)); } int getSeparatorCount(Tree right, Tree left) { if (right == null || left == null) { return 0; } int leftHead = ShiftReduceUtils.headIndex(left); int rightHead = ShiftReduceUtils.headIndex(right); Integer nextSeparator = separators.higherKey(leftHead); int count = 0; while (nextSeparator != null && nextSeparator < rightHead) { ++count; nextSeparator = separators.higherKey(nextSeparator); } return count; } HeadPosition getSeparator(int nodeNum) { if (nodeNum >= stack.size()) { return null; } TreeShapedStack stack = this.stack; for (int i = 0; i < nodeNum; ++i) { stack = stack.pop(); } Tree node = stack.peek(); int head = ShiftReduceUtils.headIndex(node); if (separators.get(head) != null) { return HeadPosition.HEAD; } int left = ShiftReduceUtils.leftIndex(node); Integer nextLeft = separators.floorKey(head); boolean hasLeft = (nextLeft != null && nextLeft >= left); int right = ShiftReduceUtils.rightIndex(node); Integer nextRight = separators.ceilingKey(head); boolean hasRight = (nextRight != null && nextRight <= right); if (hasLeft && hasRight) { return HeadPosition.BOTH; } else if (hasLeft) { return HeadPosition.LEFT; } else if (hasRight) { return HeadPosition.RIGHT; } else { return HeadPosition.NONE; } } static final Pattern separatorRegex = Pattern.compile("^[,;:-]+$"); static final char[][] equivalentSeparators = { { ',', ',' }, { ';', ';' }, { ':', ':' } }; static TreeMap findSeparators(List sentence) { TreeMap separators = Generics.newTreeMap(); for (int index = 0; index < sentence.size(); ++index) { Tree leaf = sentence.get(index).children()[0]; String value = leaf.value(); for (char[] equivalentSeparator : equivalentSeparators) { value = value.replace(equivalentSeparator[0], equivalentSeparator[1]); } if (separatorRegex.matcher(value).matches()) { // TODO: put "value" instead? Perhaps do this next time we rebuild all models separators.put(index, leaf.value()); } } return separators; } /** * The words we are parsing. They need to be tagged before we can * parse. The words are stored as preterminal Trees whose only * nodes are the tag node and the word node. */ final List sentence; /** * Essentially, the position in the queue part of the state. * 0 represents that we are at the start of the queue and nothing * has been shifted yet. */ final int tokenPosition; /** * The score of the current state based on the transitions that were * used to create it. */ final double score; @Override public double score() { return score; } /** * Whether or not processing has finished. Once that is true, only * idle transitions are allowed. */ final boolean finished; public boolean isFinished() { return finished; } public boolean endOfQueue() { return tokenPosition == sentence.size(); } @Override public String toString() { StringBuilder result = new StringBuilder(); result.append("State summary\n"); result.append(" Tokens: " + sentence + "\n"); result.append(" Token position: " + tokenPosition + "\n"); result.append(" Current stack contents: " + stack.toString("\n") + "\n"); result.append(" Component transitions: " + transitions + "\n"); result.append(" Score: " + score + "\n"); result.append(" " + ((finished) ? "" : "not ") + "finished\n"); return result.toString(); } /** * Whether or not the transitions that built the two states are * equal. Doesn't check anything else. Useful for training using * an agenda, for example, when you know the underlying information * such as the words are the same and all you care about checking is * the transition sequence */ public boolean areTransitionsEqual(State other) { return transitions.equals(other.transitions); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy