All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.parser.ChunkContextGenerator Maven / Gradle / Ivy

There is a newer version: 2.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreemnets.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.parser;

import java.util.ArrayList;
import java.util.List;

import opennlp.tools.chunker.ChunkerContextGenerator;
import opennlp.tools.util.Cache;

/**
 * Creates predivtive context for the pre-chunking phases of parsing.
 */
public class ChunkContextGenerator implements ChunkerContextGenerator {

  private static final String EOS = "eos";
  private Cache contextsCache;
  private Object wordsKey;


  public ChunkContextGenerator() {
    this(0);
  }

  public ChunkContextGenerator(int cacheSize) {
    super();
    if (cacheSize > 0) {
      contextsCache = new Cache(cacheSize);
    }
  }

  public String[] getContext(Object o) {
    Object[] data = (Object[]) o;
    return getContext(((Integer) data[0]).intValue(), (String[]) data[1], (String[]) data[2], (String[]) data[3]);
  }

  public String[] getContext(int i, String[] words, String[] prevDecisions, Object[] ac) {
    return getContext(i,words,(String[]) ac[0],prevDecisions);
  }

  public String[] getContext(int i, String[] words, String[] tags, String[] preds) {
    List features = new ArrayList(19);
    int x0 = i;
    int x_2 = x0 - 2;
    int x_1 = x0 - 1;
    int x2 = x0 + 2;
    int x1 = x0 + 1;

    String w_2,w_1,w0,w1,w2;
    String t_2,t_1,t0,t1,t2;
    String p_2,p_1;

    // chunkandpostag(-2)
    if (x_2 >= 0) {
      t_2=tags[x_2];
      p_2=preds[x_2];
      w_2=words[x_2].toString();
    }
    else {
      t_2=EOS;
      p_2=EOS;
      w_2=EOS;
    }

    // chunkandpostag(-1)
    if (x_1 >= 0) {
      t_1=tags[x_1];
      p_1=preds[x_1];
      w_1=words[x_1].toString();
    }
    else {
      t_1=EOS;
      p_1=EOS;
      w_1=EOS;
    }

    // chunkandpostag(0)
    t0=tags[x0];
    w0=words[x0].toString();

    // chunkandpostag(1)
    if (x1 < tags.length) {
      t1=tags[x1];
      w1=words[x1].toString();
    }
    else {
      t1=EOS;
      w1=EOS;
    }

    // chunkandpostag(2)
    if (x2 < tags.length) {
      t2=tags[x2];
      w2=words[x2].toString();
    }
    else {
      t2=EOS;
      w2=EOS;
    }

    String cacheKey = x0+t_2+t1+t0+t1+t2+p_2+p_1;
    if (contextsCache!= null) {
      if (wordsKey == words) {
        String[] contexts = (String[]) contextsCache.get(cacheKey);
        if (contexts != null) {
          return contexts;
        }
      }
      else {
        contextsCache.clear();
        wordsKey = words;
      }
    }

    String ct_2 = chunkandpostag(-2, w_2, t_2, p_2);
    String ctbo_2 = chunkandpostagbo(-2, t_2, p_2);
    String ct_1 = chunkandpostag(-1, w_1, t_1, p_1);
    String ctbo_1 = chunkandpostagbo(-1, t_1, p_1);
    String ct0 = chunkandpostag(0, w0, t0, null);
    String ctbo0 = chunkandpostagbo(0, t0, null);
    String ct1 = chunkandpostag(1, w1, t1, null);
    String ctbo1 = chunkandpostagbo(1, t1, null);
    String ct2 = chunkandpostag(2, w2, t2, null);
    String ctbo2 = chunkandpostagbo(2, t2, null);

    features.add("default");
    features.add(ct_2);
    features.add(ctbo_2);
    features.add(ct_1);
    features.add(ctbo_1);
    features.add(ct0);
    features.add(ctbo0);
    features.add(ct1);
    features.add(ctbo1);
    features.add(ct2);
    features.add(ctbo2);

    //chunkandpostag(-1,0)
    features.add(ct_1 + "," + ct0);
    features.add(ctbo_1 + "," + ct0);
    features.add(ct_1 + "," + ctbo0);
    features.add(ctbo_1 + "," + ctbo0);

    //chunkandpostag(0,1)
    features.add(ct0 + "," + ct1);
    features.add(ctbo0 + "," + ct1);
    features.add(ct0 + "," + ctbo1);
    features.add(ctbo0 + "," + ctbo1);
    String contexts[] = features.toArray(new String[features.size()]);
    if (contextsCache != null) {
      contextsCache.put(cacheKey,contexts);
    }
    return (contexts);
  }

  private String chunkandpostag(int i, String tok, String tag, String chunk) {
    StringBuffer feat = new StringBuffer(20);
    feat.append(i).append("=").append(tok).append("|").append(tag);
    if (i < 0) {
      feat.append("|").append(chunk);
    }
    return (feat.toString());
  }

  private String chunkandpostagbo(int i, String tag, String chunk) {
    StringBuffer feat = new StringBuffer(20);
    feat.append(i).append("*=").append(tag);
    if (i < 0) {
      feat.append("|").append(chunk);
    }
    return (feat.toString());
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy