opennlp.tools.parser.chunking.CheckContextGenerator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opennlp-tools Show documentation
There is a newer version: 2.5.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.parser.chunking;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Parse;

/**
 * Generates predictive context for deciding when a constituent is complete.
 *
 * @see AbstractContextGenerator
 */
public class CheckContextGenerator extends AbstractContextGenerator {

  /**
   * Instantiates a {@link CheckContextGenerator} for generating predictive context for deciding
   * when a constituent is complete.
   */
  public CheckContextGenerator() {
    super();
  }

  public String[] getContext(Object o) {
    Object[] params = (Object[]) o;
    return getContext((Parse[]) params[0], (String) params[1], (Integer) params[2], (Integer) params[3]);
  }

  /**
   * Finds predictive context for deciding whether the specified constituents between the
   * specified {@code start} and {@code end} index can be combined to form a
   * new constituent of the specified {@code type}.
   *
   * @param constituents The {@link Parse constituents} which have yet to be combined into new constituents.
   * @param type The type of the new constituent proposed.
   * @param start The first constituent of the proposed constituent.
   * @param end The last constituent of the proposed constituent.
   * @return The predictive context for deciding whether a new constituent should be created.
   */
  public String[] getContext(Parse[] constituents, String type, int start, int end) {
    int ps = constituents.length;
    List features = new ArrayList<>(100);

    //default
    features.add("default");
    //first constituent label
    features.add("fl=" + constituents[0].getLabel());
    Parse pstart = constituents[start];
    Parse pend = constituents[end];
    checkcons(pstart, "begin", type, features);
    checkcons(pend, "last", type, features);
    StringBuilder production = new StringBuilder(20);
    StringBuilder punctProduction = new StringBuilder(20);
    production.append("p=").append(type).append("->");
    punctProduction.append("pp=").append(type).append("->");
    for (int pi = start; pi < end; pi++) {
      Parse p = constituents[pi];
      checkcons(p, pend, type, features);
      production.append(p.getType()).append(",");
      punctProduction.append(p.getType()).append(",");
      Collection nextPunct = p.getNextPunctuationSet();
      if (nextPunct != null) {
        for (Parse punct : nextPunct) {
          punctProduction.append(punct.getType()).append(",");
        }
      }
    }
    production.append(pend.getType());
    punctProduction.append(pend.getType());
    features.add(production.toString());
    features.add(punctProduction.toString());
    Parse p_2 = null;
    Parse p_1 = null;
    Parse p1 = null;
    Parse p2 = null;
    Collection p1s = constituents[end].getNextPunctuationSet();
    Collection p2s = null;
    Collection p_1s = constituents[start].getPreviousPunctuationSet();
    Collection p_2s = null;
    if (start - 2 >= 0) {
      p_2 = constituents[start - 2];
    }
    if (start - 1 >= 0) {
      p_1 = constituents[start - 1];
      p_2s = p_1.getPreviousPunctuationSet();
    }
    if (end + 1 < ps) {
      p1 = constituents[end + 1];
      p2s = p1.getNextPunctuationSet();
    }
    if (end + 2 < ps) {
      p2 = constituents[end + 2];
    }
    surround(p_1, -1, type, p_1s, features);
    surround(p_2, -2, type, p_2s, features);
    surround(p1, 1, type, p1s, features);
    surround(p2, 2, type, p2s, features);

    return features.toArray(new String[0]);
  }
}