All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.parser.treeinsert.ParserEventStream Maven / Gradle / Ivy

There is a newer version: 2.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package opennlp.tools.parser.treeinsert;

import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import opennlp.tools.cmdline.SystemInputStreamFactory;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.Event;
import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.tools.parser.AbstractParserEventStream;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParseSampleStream;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

public class ParserEventStream extends AbstractParserEventStream {

  protected AttachContextGenerator attachContextGenerator;
  protected BuildContextGenerator buildContextGenerator;
  protected CheckContextGenerator checkContextGenerator;

  private static final boolean debug = false;

  public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype, Dictionary dict) {
    super(d, rules, etype, dict);
  }

  @Override
  public void init() {
    buildContextGenerator = new BuildContextGenerator();
    attachContextGenerator = new AttachContextGenerator(punctSet);
    checkContextGenerator = new CheckContextGenerator(punctSet);
  }

  public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype) {
    super(d, rules, etype);
  }

  /**
   * Returns a set of parent nodes which consist of the immediate
   * parent of the specified node and any of its parent which
   * share the same syntactic type.
   * @param node The node whose parents are to be returned.
   * @return a set of parent nodes.
   */
  private Map getNonAdjoinedParent(Parse node) {
    Map parents = new HashMap<>();
    Parse parent = node.getParent();
    int index = indexOf(node,parent);
    parents.put(parent, index);
    while(parent.getType().equals(node.getType())) {
      node = parent;
      parent = parent.getParent();
      index = indexOf(node,parent);
      parents.put(parent, index);
    }
    return parents;
  }

  private int indexOf(Parse child, Parse parent) {
    Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet);
    for (int ki=0;ki parseEvents, Parse[] chunks) {
    /* Frontier nodes built from node in a completed parse.  Specifically,
     * they have all their children regardless of the stage of parsing.*/
    List rightFrontier = new ArrayList<>();
    List builtNodes = new ArrayList<>();
    /* Nodes which characterize what the parse looks like to the parser as its being built.
     * Specifically, these nodes don't have all their children attached like the parents of
     * the chunk nodes do.*/
    Parse[] currentChunks = new Parse[chunks.length];
    for (int ci=0;ci currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
        if (currentRightFrontier.size() != rightFrontier.size()) {
          System.err.println("fontiers mis-aligned: "+currentRightFrontier.size()+" != "+rightFrontier.size()+" "+currentRightFrontier+" "+rightFrontier);
          System.exit(1);
        }
        Map parents = getNonAdjoinedParent(chunks[ci]);
        //try daughters first.
        for (int cfi=0;cfi "+parents);
            if (attachNode == null &&  i != null && i == nonPunctChildCount(cfn)) {
              attachType = Parser.ATTACH_DAUGHTER;
              attachNodeIndex = cfi;
              attachNode = cfn;
              if (etype == ParserEventTypeEnum.ATTACH) {
                parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, attachNodeIndex)));
              }
              //System.err.println("daughter attach "+attachNode+" at "+fi);
            }
          }
          else {
            if (debug) System.err.println("Skipping ("+cfi+"): "+cfn.getType()+","+cfn.getPreviousPunctuationSet()+" "+cfn+" :for "+currentChunks[ci].getType()+" "+currentChunks[ci]+" -> "+parents);
          }
          // Can't attach past first incomplete node.
          if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
            if (debug) System.err.println("breaking on incomplete:"+cfn.getType()+" "+cfn);
            break;
          }
        }
        //try sisters, and generate non-attach events.
        for (int cfi=0;cfi es = new ParserEventStream(
        new ParseSampleStream(new PlainTextByLineStream(
            new SystemInputStreamFactory(), Charset.defaultCharset())),
        rules, etype, dict)) {
      Event e;
      while ((e = es.read()) != null) {
        if (model != null) {
          System.out.print(
              model.eval(e.getContext())[model.getIndex(e.getOutcome())] + " ");
        }
        System.out.println(e);
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy