All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.coref.sim.NumberModel Maven / Gradle / Ivy

There is a newer version: 2.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.coref.sim;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import opennlp.maxent.GIS;
import opennlp.maxent.io.SuffixSensitiveGISModelReader;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.model.Event;
import opennlp.model.MaxentModel;
import opennlp.tools.coref.resolver.ResolverUtils;
import opennlp.tools.util.CollectionEventStream;
import opennlp.tools.util.HashList;

/**
 * Class which models the number of particular mentions and the entities made up of mentions.
 */
public class NumberModel implements TestNumberModel, TrainSimilarityModel {

  private String modelName;
  private String modelExtension = ".bin.gz";
  private MaxentModel testModel;
  private List events;

  private int singularIndex;
  private int pluralIndex;

  public static TestNumberModel testModel(String name) throws IOException {
    NumberModel nm = new NumberModel(name, false);
    return nm;
  }

  public static TrainSimilarityModel trainModel(String modelName) throws IOException {
    NumberModel gm = new NumberModel(modelName, true);
    return gm;
  }

  private NumberModel(String modelName, boolean train) throws IOException {
    this.modelName = modelName;
    if (train) {
      events = new ArrayList();
    }
    else {
      //if (MaxentResolver.loadAsResource()) {
      //  testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(modelName))))).getModel();
      //}
      testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
      singularIndex = testModel.getIndex(NumberEnum.SINGULAR.toString());
      pluralIndex = testModel.getIndex(NumberEnum.PLURAL.toString());
    }
  }

  private List getFeatures(Context np1) {
    List features = new ArrayList();
    features.add("default");
    Object[] npTokens = np1.getTokens();
    for (int ti = 0, tl = npTokens.length - 1; ti < tl; ti++) {
      features.add("mw=" + npTokens[ti].toString());
    }
    features.add("hw=" + np1.getHeadTokenText().toLowerCase());
    features.add("ht=" + np1.getHeadTokenTag());
    return features;
  }

  private void addEvent(String outcome, Context np1) {
    List feats = getFeatures(np1);
    events.add(new Event(outcome, feats.toArray(new String[feats.size()])));
  }

  public NumberEnum getNumber(Context ec) {
    if (ResolverUtils.singularPronounPattern.matcher(ec.getHeadTokenText()).matches()) {
      return NumberEnum.SINGULAR;
    }
    else if (ResolverUtils.pluralPronounPattern.matcher(ec.getHeadTokenText()).matches()) {
      return NumberEnum.PLURAL;
    }
    else {
      return NumberEnum.UNKNOWN;
    }
  }

  private NumberEnum getNumber(List entity) {
    for (Iterator ci = entity.iterator(); ci.hasNext();) {
      Context ec = ci.next();
      NumberEnum ne = getNumber(ec);
      if (ne != NumberEnum.UNKNOWN) {
        return ne;
      }
    }
    return NumberEnum.UNKNOWN;
  }

  @SuppressWarnings("unchecked")
  public void setExtents(Context[] extentContexts) {
    HashList entities = new HashList();
    List singletons = new ArrayList();
    for (int ei = 0, el = extentContexts.length; ei < el; ei++) {
      Context ec = extentContexts[ei];
      //System.err.println("NumberModel.setExtents: ec("+ec.getId()+") "+ec.toText());
      if (ec.getId() != -1) {
        entities.put(ec.getId(), ec);
      }
      else {
        singletons.add(ec);
      }
    }
    List singles = new ArrayList();
    List plurals = new ArrayList();
    // coref entities
    for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {
      Integer key = ei.next();
      List entityContexts = (List) entities.get(key);
      NumberEnum number = getNumber(entityContexts);
      if (number == NumberEnum.SINGULAR) {
        singles.addAll(entityContexts);
      }
      else if (number == NumberEnum.PLURAL) {
        plurals.addAll(entityContexts);
      }
    }
    // non-coref entities.
    for (Iterator ei = singletons.iterator(); ei.hasNext();) {
      Context ec = ei.next();
      NumberEnum number = getNumber(ec);
      if (number == NumberEnum.SINGULAR) {
        singles.add(ec);
      }
      else if (number == NumberEnum.PLURAL) {
        plurals.add(ec);
      }
    }

    for (Iterator si = singles.iterator(); si.hasNext();) {
      Context ec = si.next();
      addEvent(NumberEnum.SINGULAR.toString(), ec);
    }
    for (Iterator fi = plurals.iterator(); fi.hasNext();) {
      Context ec = fi.next();
      addEvent(NumberEnum.PLURAL.toString(),ec);
    }
  }

  public double[] numberDist(Context c) {
    List feats = getFeatures(c);
    return testModel.eval(feats.toArray(new String[feats.size()]));
  }

  public int getSingularIndex() {
    return singularIndex;
  }

  public int getPluralIndex() {
    return pluralIndex;
  }

  public void trainModel() throws IOException {
    (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy