opennlp.tools.coref.mention.DefaultParse Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreemnets. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.coref.mention;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.chunking.Parser;
import opennlp.tools.util.Span;
/**
* This class is a wrapper for {@link opennlp.tools.parser.Parse} mapping it to the API specified in {@link opennlp.tools.coref.mention.Parse}.
* This allows coreference to be done on the output of the parser.
*/
public class DefaultParse extends AbstractParse {
public static String[] NAME_TYPES = {"person", "organization", "location", "date", "time", "percentage", "money"};
private Parse parse;
private int sentenceNumber;
private static Set entitySet = new HashSet(Arrays.asList(NAME_TYPES));
/**
* Initializes the current instance.
*
* @param parse
* @param sentenceNumber
*/
public DefaultParse(Parse parse, int sentenceNumber) {
this.parse = parse;
this.sentenceNumber = sentenceNumber;
}
public int getSentenceNumber() {
return sentenceNumber;
}
public List getNamedEntities() {
List names = new ArrayList();
List kids = new LinkedList(Arrays.asList(parse.getChildren()));
while (kids.size() > 0) {
Parse p = kids.remove(0);
if (entitySet.contains(p.getType())) {
names.add(p);
}
else {
kids.addAll(Arrays.asList(p.getChildren()));
}
}
return createParses(names.toArray(new Parse[names.size()]));
}
public List getChildren() {
return createParses(parse.getChildren());
}
public List getSyntacticChildren() {
List kids = new ArrayList(Arrays.asList(parse.getChildren()));
for (int ci = 0; ci < kids.size(); ci++) {
Parse kid = kids.get(ci);
if (entitySet.contains(kid.getType())) {
kids.remove(ci);
kids.addAll(ci, Arrays.asList(kid.getChildren()));
ci--;
}
}
return createParses(kids.toArray(new Parse[kids.size()]));
}
public List getTokens() {
List tokens = new ArrayList();
List kids = new LinkedList(Arrays.asList(parse.getChildren()));
while (kids.size() > 0) {
Parse p = kids.remove(0);
if (p.isPosTag()) {
tokens.add(p);
}
else {
kids.addAll(0,Arrays.asList(p.getChildren()));
}
}
return createParses(tokens.toArray(new Parse[tokens.size()]));
}
public String getSyntacticType() {
if (entitySet.contains(parse.getType())) {
return null;
}
else {
return parse.getType();
}
}
private List createParses(Parse[] parses) {
List newParses =
new ArrayList(parses.length);
for (int pi=0,pn=parses.length;pi p.getSentenceNumber()) {
return 1;
}
else {
return parse.getSpan().compareTo(p.getSpan());
}
}
@Override
public String toString() {
return parse.toString();
}
public opennlp.tools.coref.mention.Parse getPreviousToken() {
Parse parent = parse.getParent();
Parse node = parse;
int index=-1;
//find parent with previous children
while(parent != null && index < 0) {
index = parent.indexOf(node)-1;
if (index < 0) {
node = parent;
parent = parent.getParent();
}
}
//find right-most child which is a token
if (index < 0) {
return null;
}
else {
Parse p = parent.getChildren()[index];
while (!p.isPosTag()) {
Parse[] kids = p.getChildren();
p = kids[kids.length-1];
}
return new DefaultParse(p,sentenceNumber);
}
}
public opennlp.tools.coref.mention.Parse getNextToken() {
Parse parent = parse.getParent();
Parse node = parse;
int index=-1;
//find parent with subsequent children
while(parent != null) {
index = parent.indexOf(node)+1;
if (index == parent.getChildCount()) {
node = parent;
parent = parent.getParent();
}
else {
break;
}
}
//find left-most child which is a token
if (parent == null) {
return null;
}
else {
Parse p = parent.getChildren()[index];
while (!p.isPosTag()) {
p = p.getChildren()[0];
}
return new DefaultParse(p,sentenceNumber);
}
}
@Override
public boolean equals(Object o) {
boolean result;
if (o == this) {
result = true;
}
else if (o instanceof DefaultParse) {
result = parse == ((DefaultParse) o).parse;
}
else {
result = false;
}
return result;
}
@Override
public int hashCode() {
return parse.hashCode();
}
/**
* Retrieves the {@link Parse}.
*
* @return the {@link Parse}
*/
public Parse getParse() {
return parse;
}
}