Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parser.treeinsert;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.model.Event;
import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.tools.parser.AbstractParserEventStream;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
/**
* Wrapper class for one of four {@link Parser built-attach parser} event streams.
* The particular {@link ParserEventTypeEnum event type} is specified at construction.
*/
public class ParserEventStream extends AbstractParserEventStream {
private static final Logger logger = LoggerFactory.getLogger(ParserEventStream.class);
protected AttachContextGenerator attachContextGenerator;
protected BuildContextGenerator buildContextGenerator;
protected CheckContextGenerator checkContextGenerator;
/**
* Instantiates a {@link ParserEventStream} based on the specified data stream
* of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
*
* @param d A 1-parse-per-line Penn Treebank Style parse.
* @param rules The {@link HeadRules head rules} to use.
* @param etype The {@link ParserEventTypeEnum type} of events desired.
* @param dict A tri-gram {@link Dictionary} to reduce feature generation.
*
* @see ParserEventTypeEnum
*/
public ParserEventStream(ObjectStream d, HeadRules rules,
ParserEventTypeEnum etype, Dictionary dict) {
super(d, rules, etype, dict);
}
/**
* Instantiates a {@link ParserEventStream} based on the specified data stream
* of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
*
* @param d A 1-parse-per-line Penn Treebank Style parse.
* @param rules The {@link HeadRules head rules} to use.
* @param etype The {@link ParserEventTypeEnum type} of events desired.
*
* @see ParserEventTypeEnum
*/
public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype) {
super(d, rules, etype);
}
@Override
public void init() {
buildContextGenerator = new BuildContextGenerator();
attachContextGenerator = new AttachContextGenerator(punctSet);
checkContextGenerator = new CheckContextGenerator(punctSet);
}
/**
* Returns a map of parent nodes which consist of the immediate
* parent of the specified {@link Parse node} and any of its parent which
* share the same syntactic type.
*
* @param node The {@link Parse node} whose parents are to be returned.
* @return A {@link Map} of parent {@link Parse nodes}.
*/
private Map getNonAdjoinedParent(Parse node) {
Map parents = new HashMap<>();
Parse parent = node.getParent();
int index = indexOf(node,parent);
parents.put(parent, index);
while (parent.getType().equals(node.getType())) {
node = parent;
parent = parent.getParent();
index = indexOf(node,parent);
parents.put(parent, index);
}
return parents;
}
private int indexOf(Parse child, Parse parent) {
Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet);
for (int ki = 0; ki < kids.length; ki++) {
if (child == kids[ki]) {
return ki;
}
}
return -1;
}
private int nonPunctChildCount(Parse node) {
return Parser.collapsePunctuation(node.getChildren(),punctSet).length;
}
@Override
protected boolean lastChild(Parse child, Parse parent) {
boolean lc = super.lastChild(child, parent);
while (!lc) {
Parse cp = child.getParent();
if (cp != parent && cp.getType().equals(child.getType())) {
lc = super.lastChild(cp,parent);
child = cp;
}
else {
break;
}
}
return lc;
}
@Override
protected void addParseEvents(List parseEvents, Parse[] chunks) {
/* Frontier nodes built from node in a completed parse. Specifically,
* they have all their children regardless of the stage of parsing.*/
List rightFrontier = new ArrayList<>();
List builtNodes = new ArrayList<>();
/* Nodes which characterize what the parse looks like to the parser as its being built.
* Specifically, these nodes don't have all their children attached like the parents of
* the chunk nodes do.*/
Parse[] currentChunks = new Parse[chunks.length];
for (int ci = 0; ci < chunks.length; ci++) {
currentChunks[ci] = (Parse) chunks[ci].clone();
currentChunks[ci].setPrevPunctuation(chunks[ci].getPreviousPunctuationSet());
currentChunks[ci].setNextPunctuation(chunks[ci].getNextPunctuationSet());
currentChunks[ci].setLabel(Parser.COMPLETE);
chunks[ci].setLabel(Parser.COMPLETE);
}
for (int ci = 0; ci < chunks.length; ci++) {
Parse parent = chunks[ci].getParent();
Parse prevParent = chunks[ci];
int off = 0;
//build un-built parents
if (!chunks[ci].isPosTag()) {
builtNodes.add(off++,chunks[ci]);
}
//perform build stages
while (!parent.getType().equals(AbstractBottomUpParser.TOP_NODE) && parent.getLabel() == null) {
if (!prevParent.getType().equals(parent.getType())) {
//build level
if (logger.isDebugEnabled()) {
logger.debug("Build: {} for: {}", parent.getType(), currentChunks[ci]);
}
if (etype == ParserEventTypeEnum.BUILD) {
parseEvents.add(new Event(parent.getType(),
buildContextGenerator.getContext(currentChunks, ci)));
}
builtNodes.add(off++,parent);
Parse newParent = new Parse(currentChunks[ci].getText(),
currentChunks[ci].getSpan(),parent.getType(),1,0);
newParent.add(currentChunks[ci],rules);
newParent.setPrevPunctuation(currentChunks[ci].getPreviousPunctuationSet());
newParent.setNextPunctuation(currentChunks[ci].getNextPunctuationSet());
currentChunks[ci].setParent(newParent);
currentChunks[ci] = newParent;
newParent.setLabel(Parser.BUILT);
//see if chunk is complete
if (lastChild(chunks[ci], parent)) {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.COMPLETE,
checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false)));
}
currentChunks[ci].setLabel(Parser.COMPLETE);
parent.setLabel(Parser.COMPLETE);
}
else {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.INCOMPLETE,
checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false)));
}
currentChunks[ci].setLabel(Parser.INCOMPLETE);
parent.setLabel(Parser.COMPLETE);
}
chunks[ci] = parent;
}
//TODO: Consider whether we need to set this label or train parses at all.
parent.setLabel(Parser.BUILT);
prevParent = parent;
parent = parent.getParent();
}
//decide to attach
if (etype == ParserEventTypeEnum.BUILD) {
parseEvents.add(new Event(Parser.DONE, buildContextGenerator.getContext(currentChunks, ci)));
}
//attach node
String attachType = null;
/* Node selected for attachment. */
Parse attachNode = null;
int attachNodeIndex = -1;
if (ci == 0) {
Parse top = new Parse(currentChunks[ci].getText(),
new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0);
top.insert(currentChunks[ci]);
}
else {
/* Right frontier consisting of partially-built nodes based on current state of the parse.*/
List currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
if (currentRightFrontier.size() != rightFrontier.size()) {
logger.error("Frontiers mis-aligned: {} != {} {} {}", currentRightFrontier.size(),
rightFrontier.size(), currentRightFrontier, rightFrontier);
System.exit(1);
}
Map parents = getNonAdjoinedParent(chunks[ci]);
//try daughters first.
for (int cfi = 0; cfi < currentRightFrontier.size(); cfi++) {
Parse frontierNode = rightFrontier.get(cfi);
Parse cfn = currentRightFrontier.get(cfi);
if (!Parser.checkComplete || !Parser.COMPLETE.equals(cfn.getLabel())) {
Integer i = parents.get(frontierNode);
if (logger.isDebugEnabled())
logger.debug("Looking at attachment site ({}): {} ci={} cs={}, {} :for {} {} -> {}",
cfi, cfn.getType(), i, nonPunctChildCount(cfn), cfn, currentChunks[ci].getType(),
currentChunks[ci], parents
);
if (attachNode == null && i != null && i == nonPunctChildCount(cfn)) {
attachType = Parser.ATTACH_DAUGHTER;
attachNodeIndex = cfi;
attachNode = cfn;
if (etype == ParserEventTypeEnum.ATTACH) {
parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks,
ci, currentRightFrontier, attachNodeIndex)));
}
}
}
else {
if (logger.isDebugEnabled())
logger.debug("Skipping ({}): {},{} {} :for {} {} -> {}",
cfi, cfn.getType(), cfn.getPreviousPunctuationSet(), cfn,
currentChunks[ci].getType(), currentChunks[ci], parents);
}
// Can't attach past first incomplete node.
if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
if (logger.isDebugEnabled()) logger.debug("breaking on incomplete: {} {}", cfn.getType(), cfn);
break;
}
}
//try sisters, and generate non-attach events.
for (int cfi = 0; cfi < currentRightFrontier.size(); cfi++) {
Parse frontierNode = rightFrontier.get(cfi);
Parse cfn = currentRightFrontier.get(cfi);
if (attachNode == null && parents.containsKey(frontierNode.getParent())
&& frontierNode.getType().equals(frontierNode.getParent().getType())
) { //&& frontierNode.getParent().getLabel() == null) {
attachType = Parser.ATTACH_SISTER;
attachNode = cfn;
attachNodeIndex = cfi;
if (etype == ParserEventTypeEnum.ATTACH) {
parseEvents.add(new Event(Parser.ATTACH_SISTER,
attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
}
chunks[ci].getParent().setLabel(Parser.BUILT);
}
else if (cfi == attachNodeIndex) {
//skip over previously attached daughter.
}
else {
if (etype == ParserEventTypeEnum.ATTACH) {
parseEvents.add(new Event(Parser.NON_ATTACH,
attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
}
}
//Can't attach past first incomplete node.
if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
if (logger.isDebugEnabled()) logger.debug("breaking on incomplete: {} {}", cfn.getType(), cfn);
break;
}
}
//attach Node
if (attachNode != null) {
if (Parser.ATTACH_DAUGHTER.equals(attachType)) {
Parse daughter = currentChunks[ci];
if (logger.isDebugEnabled())
logger.debug("daughter attach a={}:{} d={} com={}", attachNode.getType(),
attachNode, daughter, lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
attachNode.add(daughter,rules);
daughter.setParent(attachNode);
if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.COMPLETE,
checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
}
attachNode.setLabel(Parser.COMPLETE);
}
else {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.INCOMPLETE,
checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
}
}
}
else if (Parser.ATTACH_SISTER.equals(attachType)) {
Parse frontierNode = rightFrontier.get(attachNodeIndex);
rightFrontier.set(attachNodeIndex,frontierNode.getParent());
Parse sister = currentChunks[ci];
if (logger.isDebugEnabled())
logger.debug("sister attach a={}:{} s={} ap={} com={}", attachNode.getType(),
attachNode, sister, attachNode.getParent(),
lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
Parse newParent = attachNode.getParent().adjoin(sister,rules);
newParent.setParent(attachNode.getParent());
attachNode.setParent(newParent);
sister.setParent(newParent);
if (attachNode == currentChunks[0]) {
currentChunks[0] = newParent;
}
if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.COMPLETE,
checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
}
newParent.setLabel(Parser.COMPLETE);
}
else {
if (etype == ParserEventTypeEnum.CHECK) {
parseEvents.add(new Event(Parser.INCOMPLETE,
checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
}
newParent.setLabel(Parser.INCOMPLETE);
}
}
//update right frontier
for (int ni = 0; ni < attachNodeIndex; ni++) {
rightFrontier.remove(0);
}
}
else {
throw new RuntimeException("No Attachment: " + chunks[ci]);
}
}
rightFrontier.addAll(0,builtNodes);
builtNodes.clear();
}
}
}