edu.stanford.nlp.parser.shiftreduce.BaseModel Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.parser.shiftreduce;
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.Set;
import edu.stanford.nlp.parser.common.ParserConstraint;
import edu.stanford.nlp.tagger.common.Tagger;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.ScoredObject;
public abstract class BaseModel implements Serializable {
final ShiftReduceOptions op;
// This is shared with the owning ShiftReduceParser (for now, at least)
final Index transitionIndex;
final Set knownStates; // the set of goal categories of a reduce = the set of phrasal categories in a grammar
final Set rootStates;
final Set rootOnlyStates;
public BaseModel(ShiftReduceOptions op, Index transitionIndex,
Set knownStates, Set rootStates, Set rootOnlyStates) {
this.transitionIndex = transitionIndex;
this.op = op;
this.knownStates = knownStates;
this.rootStates = rootStates;
this.rootOnlyStates = rootOnlyStates;
}
public BaseModel(BaseModel other) {
this.op = other.op;
this.transitionIndex = other.transitionIndex;
this.knownStates = other.knownStates;
this.rootStates = other.rootStates;
this.rootOnlyStates = other.rootOnlyStates;
}
/**
* Returns a transition which might not even be part of the model,
* but will hopefully allow progress in an otherwise stuck parse
*
* TODO: perhaps we want to create an EmergencyTransition class
* which indicates that something has gone wrong
*/
public Transition findEmergencyTransition(State state, List constraints) {
if (state.stack.size() == 0) {
return null;
}
// See if there is a constraint whose boundaries match the end
// points of the top node on the stack. If so, we can apply a
// UnaryTransition / CompoundUnaryTransition if that would solve
// the constraint
if (constraints != null) {
final Tree top = state.stack.peek();
for (ParserConstraint constraint : constraints) {
if (ShiftReduceUtils.leftIndex(top) != constraint.start || ShiftReduceUtils.rightIndex(top) != constraint.end - 1) {
continue;
}
if (ShiftReduceUtils.constraintMatchesTreeTop(top, constraint)) {
continue;
}
// found an unmatched constraint that can be fixed with a unary transition
// now we need to find a matching state for the transition
for (String label : knownStates) {
if (constraint.state.matcher(label).matches()) {
return ((op.compoundUnaries) ?
new CompoundUnaryTransition(Collections.singletonList(label), false) :
new UnaryTransition(label, false));
}
}
}
}
if (ShiftReduceUtils.isTemporary(state.stack.peek()) &&
(state.stack.size() == 1 || ShiftReduceUtils.isTemporary(state.stack.pop().peek()))) {
return ((op.compoundUnaries) ?
new CompoundUnaryTransition(Collections.singletonList(state.stack.peek().value().substring(1)), false) :
new UnaryTransition(state.stack.peek().value().substring(1), false));
}
if (state.stack.size() == 1 && state.tokenPosition >= state.sentence.size()) {
// either need to finalize or transition to a root state
if (!rootStates.contains(state.stack.peek().value())) {
String root = rootStates.iterator().next();
return ((op.compoundUnaries) ?
new CompoundUnaryTransition(Collections.singletonList(root), false) :
new UnaryTransition(root, false));
}
}
if (state.stack.size() == 1) {
return null;
}
if (ShiftReduceUtils.isTemporary(state.stack.peek())) {
return new BinaryTransition(state.stack.peek().value().substring(1), BinaryTransition.Side.RIGHT);
}
if (ShiftReduceUtils.isTemporary(state.stack.pop().peek())) {
return new BinaryTransition(state.stack.pop().peek().value().substring(1), BinaryTransition.Side.LEFT);
}
return null;
}
public abstract Collection> findHighestScoringTransitions(State state, boolean requireLegal, int numTransitions, List constraints);
/**
* Train a new model. This is the method to override for new models
* such that the ShiftReduceParser will fill in the model. Given a
* collection of training trees and some other various information,
* this should train a new model. The model is expected to already
* know about the possible transitions and which states are eligible
* to be root states via the BaseModel constructor.
*
* @param serializedPath Where serialized models go. If the appropriate options are set, the method can use this to save intermediate models.
* @param tagger The tagger to use when evaluating devTreebank. TODO: it would make more sense for ShiftReduceParser to retag the trees first
* @param random A random number generator to use for any random numbers. Useful to make sure results can be reproduced.
* @param binarizedTrainTrees The treebank to train from.
* @param transitionLists binarizedTrainTrees converted into lists of transitions that will reproduce the same tree.
* @param devTreebank a set of trees which can be used for dev testing (assuming the user provided a dev treebank)
* @param nThreads how many threads the model can use for training
*/
public abstract void trainModel(String serializedPath, Tagger tagger, Random random, List binarizedTrainTrees, List> transitionLists, Treebank devTreebank, int nThreads);
abstract Set tagSet();
private static final long serialVersionUID = -175375535849840611L;
}