opennlp.tools.parser.chunking.CheckContextGenerator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parser.chunking;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Parse;
/**
* Class for generating predictive context for deciding when a constituent is complete.
*/
public class CheckContextGenerator extends AbstractContextGenerator {
/**
* Creates a new context generator for generating predictive context for deciding
* when a constituent is complete.
*/
public CheckContextGenerator() {
super();
}
public String[] getContext(Object o) {
Object[] params = (Object[]) o;
return getContext((Parse[]) params[0], (String) params[1], (Integer) params[2], (Integer) params[3]);
}
/**
* Returns predictive context for deciding whether the specified constituents between the
* specified start and end index can be combined to form a new constituent of the specified type.
*
* @param constituents The constituents which have yet to be combined into new constituents.
* @param type The type of the new constituent proposed.
* @param start The first constituent of the proposed constituent.
* @param end The last constituent of the proposed constituent.
* @return The predictive context for deciding whether a new constituent should be created.
*/
public String[] getContext(Parse[] constituents, String type, int start, int end) {
int ps = constituents.length;
List features = new ArrayList<>(100);
//default
features.add("default");
//first constituent label
features.add("fl=" + constituents[0].getLabel());
Parse pstart = constituents[start];
Parse pend = constituents[end];
checkcons(pstart, "begin", type, features);
checkcons(pend, "last", type, features);
StringBuilder production = new StringBuilder(20);
StringBuilder punctProduction = new StringBuilder(20);
production.append("p=").append(type).append("->");
punctProduction.append("pp=").append(type).append("->");
for (int pi = start; pi < end; pi++) {
Parse p = constituents[pi];
checkcons(p, pend, type, features);
production.append(p.getType()).append(",");
punctProduction.append(p.getType()).append(",");
Collection nextPunct = p.getNextPunctuationSet();
if (nextPunct != null) {
for (Iterator pit = nextPunct.iterator(); pit.hasNext();) {
Parse punct = pit.next();
punctProduction.append(punct.getType()).append(",");
}
}
}
production.append(pend.getType());
punctProduction.append(pend.getType());
features.add(production.toString());
features.add(punctProduction.toString());
Parse p_2 = null;
Parse p_1 = null;
Parse p1 = null;
Parse p2 = null;
Collection p1s = constituents[end].getNextPunctuationSet();
Collection p2s = null;
Collection p_1s = constituents[start].getPreviousPunctuationSet();
Collection p_2s = null;
if (start - 2 >= 0) {
p_2 = constituents[start - 2];
}
if (start - 1 >= 0) {
p_1 = constituents[start - 1];
p_2s = p_1.getPreviousPunctuationSet();
}
if (end + 1 < ps) {
p1 = constituents[end + 1];
p2s = p1.getNextPunctuationSet();
}
if (end + 2 < ps) {
p2 = constituents[end + 2];
}
surround(p_1, -1, type, p_1s, features);
surround(p_2, -2, type, p_2s, features);
surround(p1, 1, type, p1s, features);
surround(p2, 2, type, p2s, features);
return features.toArray(new String[features.size()]);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy