All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eus.ixa.ixa.pipe.nerc.features.WindowFeatureGenerator Maven / Gradle / Ivy

There is a newer version: 1.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eus.ixa.ixa.pipe.nerc.features;

import java.util.ArrayList;
import java.util.List;

import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;

/**
 * Generates previous and next features for a given {@link AdaptiveFeatureGenerator}.
 * The window size can be specified.
 *
 * Features:
 * Current token is always included unchanged
 * Previous tokens are prefixed with p distance
 * Next tokens are prefix with n distance
 */
public class WindowFeatureGenerator implements AdaptiveFeatureGenerator {

  public static final String PREV_PREFIX = "p";
  public static final String NEXT_PREFIX = "n";

  private final AdaptiveFeatureGenerator generator;

  private final int prevWindowSize;
  private final int nextWindowSize;

  /**
   * Initializes the current instance with the given parameters.
   *
   * @param generator feature generator to apply to the window.
   * @param prevWindowSize size of the window to the left of the current token.
   * @param nextWindowSize size of the window to the right of the current token.
   */
  public WindowFeatureGenerator(AdaptiveFeatureGenerator generator, int prevWindowSize,  int nextWindowSize) {
    if (prevWindowSize < 1 || nextWindowSize < 1) {
      throw new IllegalArgumentException("window parameter must be at least 1. " +
          "minLength=" + prevWindowSize + ", maxLength= " + nextWindowSize);
    }
    this.generator = generator;
    this.prevWindowSize = prevWindowSize;
    this.nextWindowSize = nextWindowSize;
  }
  
  /**
   * Initializes the current instance with the given parameters.
   * 
   * @param prevWindowSize size of the window to the left of the current token
   * @param nextWindowSize size of the window to the right of the current token
   * @param generators the array of feature generators
   */
  public WindowFeatureGenerator(int prevWindowSize, int nextWindowSize, AdaptiveFeatureGenerator... generators) {
    this(new AggregatedFeatureGenerator(generators), prevWindowSize, nextWindowSize);
  }
  
  /**
   * Initializes the current instance. The previous and next window size is 5.
   *
   * @param generator feature generator
   */
  public WindowFeatureGenerator(AdaptiveFeatureGenerator generator) {
    this(generator, 6, 6);
  }
  
  /**
   * Initializes the current instance with the given parameters.
   * 
   * @param generators array of feature generators
   */
  public WindowFeatureGenerator(AdaptiveFeatureGenerator... generators) {
    this(new AggregatedFeatureGenerator(generators), 5, 5);
  }
  
  public void createFeatures(List features, String[] tokens, int index, String[] preds) {
    // current features
    generator.createFeatures(features, tokens, index, preds);

    // previous features
    for (int i = 1; i < prevWindowSize + 1; i++) {
      if (index - i >= 0) {

        List prevFeatures = new ArrayList();

        generator.createFeatures(prevFeatures, tokens, index - i, preds);

        for (String prevFeature : prevFeatures) {
          features.add(PREV_PREFIX + i + prevFeature);
        }
      }
    }

    // next features
    for (int i = 1; i < nextWindowSize + 1; i++) {
      if (i + index < tokens.length) {

        List nextFeatures = new ArrayList();

        generator.createFeatures(nextFeatures, tokens, index + i, preds);

        for (String nextFeature : nextFeatures) {
          features.add(NEXT_PREFIX + i + nextFeature);
        }
      }
    }
  }

  public void updateAdaptiveData(String[] tokens, String[] outcomes) {
    generator.updateAdaptiveData(tokens, outcomes);
  }

  public void clearAdaptiveData() {
      generator.clearAdaptiveData();
  }

  @Override
  public String toString() {
    return super.toString()+": Prev window size: " + prevWindowSize +", Next window size: " + nextWindowSize;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy