All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.shiftreduce.ReorderingOracle Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.parser.shiftreduce;

import java.util.List;
import java.util.ListIterator;

import edu.stanford.nlp.util.Generics;

/**
 * A second attempt at making an oracle.  Instead of always trying to
 * return the best transition, it simply rearranges the transition
 * lists after an incorrect transition.  If this is not possible,
 * training will be halted as in the case of early update.
 *
 * @author John Bauer
 */
public class ReorderingOracle {
  ShiftReduceOptions op;

  public ReorderingOracle(ShiftReduceOptions op) {
    this.op = op;
  }

  /**
   * Given a predicted transition and a state, this method rearranges
   * the list of transitions and returns whether or not training can
   * continue.
   */
  boolean reorder(State state, Transition chosenTransition, List transitions) {
    if (transitions.size() == 0) {
      throw new AssertionError();
    }

    Transition goldTransition = transitions.get(0);

    // If the transition is gold, we are already satisfied.
    if (chosenTransition.equals(goldTransition)) {
      transitions.remove(0);
      return true;
    }

    // If the transition should have been a Unary/CompoundUnary
    // transition and it was something else or a different Unary
    // transition, see if the transition sequence can be continued
    // after skipping past the unary
    if ((goldTransition instanceof UnaryTransition) || (goldTransition instanceof CompoundUnaryTransition)) {
      transitions.remove(0);
      return reorder(state, chosenTransition, transitions);
    }

    // If the chosen transition was an incorrect Unary/CompoundUnary
    // transition, skip past it and hope to continue the gold
    // transition sequence.  However, if we have Unary/CompoundUnary
    // in a row, we have to return false to prevent loops.
    // Also, if the state stack size is 0, can't keep going
    if ((chosenTransition instanceof UnaryTransition) || (chosenTransition instanceof CompoundUnaryTransition)) {
      if (state.transitions.size() > 0) {
        Transition previous = state.transitions.peek();
        if ((previous instanceof UnaryTransition) || (previous instanceof CompoundUnaryTransition)) {
          return false;
        }
      }
      if (state.stack.size() == 0) {
        return false;
      }
      return true;
    }

    if (chosenTransition instanceof BinaryTransition) {
      if (state.stack.size() < 2) {
        return false;
      }

      if (goldTransition instanceof ShiftTransition) {
        // Helps, but adds quite a bit of size to the model and only helps a tiny bit
        return op.trainOptions().oracleBinaryToShift && reorderIncorrectBinaryTransition(transitions);
      }

      if (!(goldTransition instanceof BinaryTransition)) {
        return false;
      }

      BinaryTransition chosenBinary = (BinaryTransition) chosenTransition;
      BinaryTransition goldBinary = (BinaryTransition) goldTransition;
      if (chosenBinary.isBinarized()) {
        // Binarized labels only work (for now, at least) if the side
        // is wrong but the label itself is correct
        if (goldBinary.isBinarized() && chosenBinary.label.equals(goldBinary.label)) {
          transitions.remove(0);
          return true;
        } else {
          return false;
        }
      }

      // In all other binarized situations, essentially what has
      // happened is we added a bracket error, but future brackets can
      // still wind up being correct
      transitions.remove(0);
      return true;
    }

    if ((chosenTransition instanceof ShiftTransition) && (goldTransition instanceof BinaryTransition)) {
      // can't shift at the end of the queue
      if (state.endOfQueue()) {
        return false;
      }

      // doesn't help, sadly
      BinaryTransition goldBinary = (BinaryTransition) goldTransition;
      if (!goldBinary.isBinarized()) {
        return op.trainOptions().oracleShiftToBinary && reorderIncorrectShiftTransition(transitions);
      }
    }

    return false;
  }

  static boolean reorderIncorrectBinaryTransition(List transitions) {
    int shiftCount = 0;
    ListIterator cursor = transitions.listIterator();
    do {
      if (!cursor.hasNext()) {
        return false;
      }
      Transition next = cursor.next();
      if (next instanceof ShiftTransition) {
        ++shiftCount;
      } else if (next instanceof BinaryTransition) {
        --shiftCount;
        if (shiftCount <= 0) {
          cursor.remove();
        }
      }
    } while (shiftCount > 0);

    if (!cursor.hasNext()) {
      return false;
    }
    Transition next = cursor.next();
    while ((next instanceof UnaryTransition) || (next instanceof CompoundUnaryTransition)) {
      cursor.remove();
      if (!cursor.hasNext()) {
        return false;
      }
      next = cursor.next();
    }

    // At this point, the rest of the transition sequence should suffice
    return true;
  }

  /**
   * In this case, we are starting to build a new subtree when instead
   * we should have been combining existing trees.  What we can do is
   * find the transitions that build up the next subtree in the gold
   * transition list, figure out how it gets applied to a
   * BinaryTransition, and make that the next BinaryTransition we
   * perform after finishing the subtree.  If there are multiple
   * BinaryTransitions in a row, we ignore any associated
   * UnaryTransitions (unfixable) and try to transition to the final
   * state.  The assumption is that we can't do anything about the
   * incorrect subtrees any more, so we skip them all.
   *
* Sadly, this does not seem to help - the parser gets worse when it * learns these states */ static boolean reorderIncorrectShiftTransition(List transitions) { List leftoverBinary = Generics.newArrayList(); while (transitions.size() > 0) { Transition head = transitions.remove(0); if (head instanceof ShiftTransition) { break; } if (head instanceof BinaryTransition) { leftoverBinary.add((BinaryTransition) head); } } if (transitions.size() == 0 || leftoverBinary.size() == 0) { // honestly this is an error we should probably just throw return false; } int shiftCount = 0; ListIterator cursor = transitions.listIterator(); BinaryTransition lastBinary = null; while (cursor.hasNext() && shiftCount >= 0) { Transition next = cursor.next(); if (next instanceof ShiftTransition) { ++shiftCount; } else if (next instanceof BinaryTransition) { --shiftCount; if (shiftCount < 0) { lastBinary = (BinaryTransition) next; cursor.remove(); } } } if (!cursor.hasNext() || lastBinary == null) { // once again, an error. even if the sequence of tree altering // gold transitions ends with a BinaryTransition, there should // be a FinalizeTransition after that return false; } String label = lastBinary.label; if (lastBinary.isBinarized()) { label = label.substring(1); } if (lastBinary.side == BinaryTransition.Side.RIGHT) { // When we finally transition all the binary transitions, we // will want to have the new node be the right head. Therefore, // we add a bunch of temporary binary transitions with a right // head, ending up with a binary transition with a right head for (int i = 0; i < leftoverBinary.size(); ++i) { cursor.add(new BinaryTransition("@" + label, BinaryTransition.Side.RIGHT)); } // use lastBinary.label in case the last transition is temporary cursor.add(new BinaryTransition(lastBinary.label, BinaryTransition.Side.RIGHT)); } else { cursor.add(new BinaryTransition("@" + label, BinaryTransition.Side.LEFT)); for (int i = 0; i < leftoverBinary.size() - 1; ++i) { cursor.add(new BinaryTransition("@" + label, leftoverBinary.get(i).side)); } cursor.add(new BinaryTransition(lastBinary.label, leftoverBinary.get(leftoverBinary.size() - 1).side)); } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy