All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.lexparser.AbstractUnknownWordModelTrainer Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.lexparser;

import java.util.Collection;
import java.util.List;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Index;


public abstract class AbstractUnknownWordModelTrainer
  implements UnknownWordModelTrainer
{
  double treesRead;
  double totalTrees;

  Index wordIndex, tagIndex;

  Options op;
  Lexicon lex;

  @Override
  public void initializeTraining(Options op, Lexicon lex,
                                 Index wordIndex,
                                 Index tagIndex, double totalTrees) {
    this.totalTrees = totalTrees;
    this.treesRead = 0;

    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
    this.op = op;
    this.lex = lex;
  }



  @Override
  public final void train(Collection trees) {
    train(trees, 1.0);
  }

  @Override
  public final void train(Collection trees, double weight) {
    for (Tree tree : trees) {
      train(tree, weight);
    }
  }


  @Override
  public final void train(Tree tree, double weight) {
    incrementTreesRead(weight);
    int loc = 0;
    List yield = tree.taggedYield();
    for (TaggedWord tw : yield) {
      train(tw, loc, weight);
      ++loc;
    }
  }

  @Override
  public void incrementTreesRead(double weight) {
    treesRead += weight;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy