All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.lexparser.AbstractDependencyGrammar Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Interner;

import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.ANY_WORD_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.ANY_TAG_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.STOP_WORD_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.STOP_TAG_INT;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.ObjectInputStream;
import java.util.Collection;
import java.util.Map;

/**
 * An abstract base class for dependency grammars.  The only thing you have
 * to implement in a subclass is scoreTB (score a "tag binned" dependency
 * in the tagProjection space).  A subclass also has to either call
 * super() in its constructor, or otherwise initialize the tagBin array.
 * The call to initTagBins() (in the constructor) must be made after all
 * keys have been entered into tagIndex.
 *
 * @author Galen Andrew
 */
public abstract class AbstractDependencyGrammar implements DependencyGrammar {

  protected TagProjection tagProjection;
  protected final Index tagIndex;
  protected final Index wordIndex;
  protected int numTagBins;
  protected int[] tagBin;
  protected TreebankLanguagePack tlp;
  protected boolean directional;
  protected boolean useDistance;
  protected boolean useCoarseDistance;

  protected Lexicon lex;

  protected final IntTaggedWord stopTW;
  protected final IntTaggedWord wildTW;

  protected transient Map expandDependencyMap = Generics.newHashMap();

  private static final boolean DEBUG = false;

  protected int[] coarseDistanceBins = {0, 2, 5};
  protected int[] regDistanceBins = {0, 1, 5, 10};

  protected final Options op;

  transient protected Interner itwInterner =
    new Interner();

  public AbstractDependencyGrammar(TreebankLanguagePack tlp, TagProjection tagProjection, boolean directional, boolean useDistance, boolean useCoarseDistance, Options op, Index wordIndex, Index tagIndex) {
    this.tlp = tlp;
    this.tagProjection = tagProjection;
    this.directional = directional;
    this.useDistance = useDistance;
    this.useCoarseDistance = useCoarseDistance;
    this.op = op;
    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
    stopTW = new IntTaggedWord(STOP_WORD_INT, STOP_TAG_INT);
    wildTW = new IntTaggedWord(ANY_WORD_INT, ANY_TAG_INT);

    initTagBins();
  }

  public void setLexicon(Lexicon lexicon) {
    lex = lexicon;
  }

  /**
   * Default is no-op.
   */
  public void tune(Collection trees) {
  }

  public int numTagBins() {
    return numTagBins;
  }

  public int tagBin(int tag) {
    if (tag < 0) {
      return tag;
    } else {
      return tagBin[tag];
    }
  }

  public boolean rootTW(IntTaggedWord rTW) {
    // System.out.println("rootTW: checking if " + rTW.toString("verbose") +
    // " == " + Lexicon.BOUNDARY_TAG + "[" +
    // tagIndex.indexOf(Lexicon.BOUNDARY_TAG) + "]" + ": " +
    // (rTW.tag == tagIndex.indexOf(Lexicon.BOUNDARY_TAG)));
    return rTW.tag == tagIndex.indexOf(Lexicon.BOUNDARY_TAG);
  }

  protected short valenceBin(int distance) {
    if (!useDistance) {
      return 0;
    }
    if (distance < 0) {
      return -1;
    }
    if (distance == 0) {
      return 0;
    }
    return 1;
  }

  public int numDistBins() {
    return useCoarseDistance ? 4 : 5;
  }

  public short distanceBin(int distance) {
    if (!useDistance) {
      return 0;
    } else if (useCoarseDistance) {
      return coarseDistanceBin(distance);
    } else {
      return regDistanceBin(distance);
    }
  }

  public short regDistanceBin(int distance) {
    for(short i=0; i tagBinIndex = new HashIndex();
    if (DEBUG) {
      System.err.println();
      System.err.println("There are " + tagIndex.size() + " tags.");
    }
    tagBin = new int[tagIndex.size()];
    for (int t = 0; t < tagBin.length; t++) {
      String tagStr = tagIndex.get(t);
      String binStr;
      if (tagProjection == null) {
        binStr = tagStr;
      } else {
        binStr = tagProjection.project(tagStr);
      }
      tagBin[t] = tagBinIndex.addToIndex(binStr);
      if (DEBUG) {
        System.err.println("initTagBins: Mapped " + tagStr + " (" + t +
                           ") to " + binStr + " (" + tagBin[t] + ")");
      }
    }
    numTagBins = tagBinIndex.size();
    if (DEBUG) {
      System.err.println("initTagBins: tags " + tagBin.length + " bins " +
                         numTagBins);
      System.err.println("tagBins: " + tagBinIndex);
    }
  }

  public double score(IntDependency dependency) {
    return scoreTB(dependency.head.word, tagBin(dependency.head.tag), dependency.arg.word, tagBin(dependency.arg.tag), dependency.leftHeaded, dependency.distance);
  }

  // currently unused
  public double score(int headWord, int headTag, int argWord, int argTag, boolean leftHeaded, int dist) {
    IntDependency tempDependency = new IntDependency(headWord, headTag, argWord, argTag, leftHeaded, dist);
    return score(tempDependency); // this method tag bins
  }

  public double scoreTB(int headWord, int headTag, int argWord, int argTag, boolean leftHeaded, int dist) {
    IntDependency tempDependency = new IntDependency(headWord, headTag, argWord, argTag, leftHeaded, dist);
    return scoreTB(tempDependency);
  }

  private void readObject(ObjectInputStream ois)
    throws IOException, ClassNotFoundException
  {
    ois.defaultReadObject();
    // reinitialize the transient objects
    itwInterner = new Interner();
  }

  /**
   * Default is to throw exception.
   * @throws IOException
   */
  public void readData(BufferedReader in) throws IOException {
    throw new UnsupportedOperationException();
  }

  /**
   * Default is to throw exception.
   * @throws IOException
   */
  public void writeData(PrintWriter out) throws IOException {
    throw new UnsupportedOperationException();
  }

  /**
   * This is a custom interner that simultaneously creates and interns
   * an IntDependency.
   *
   * @return An interned IntDependency
   */
  protected IntDependency intern(IntTaggedWord headTW, IntTaggedWord argTW, boolean leftHeaded, short dist) {
    Map map = expandDependencyMap;
    IntDependency internTempDependency = new IntDependency(itwInterner.intern(headTW), itwInterner.intern(argTW), leftHeaded, dist);
    IntDependency returnDependency = internTempDependency;
    if (map != null) {
      returnDependency = map.get(internTempDependency);
      if (returnDependency == null) {
        map.put(internTempDependency, internTempDependency);
        returnDependency = internTempDependency;
      }
    }
    return returnDependency;
  }

  private static final long serialVersionUID = 3L;

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy