All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.util.model.ModelUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.util.model;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import ai.idylnlp.opennlp.custom.EncryptedDataOutputStream;

import opennlp.tools.ml.maxent.GISTrainer;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.GenericModelWriter;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.TrainingParameters;

/**
 * Utility class for handling of {@link MaxentModel}s.
 */
public final class ModelUtil {

  private ModelUtil() {
    // not intended to be instantiated
  }

  /**
   * Writes the given model to the given {@link OutputStream}.
   *
   * This methods does not closes the provided stream.
   *
   * @param model the model to be written
   * @param out the stream the model should be written to
   *
   * @throws IOException
   * @throws IllegalArgumentException in case one of the parameters is null
   */
  public static void writeModel(MaxentModel model, final OutputStream out)
          throws IOException, IllegalArgumentException {

    Objects.requireNonNull(model, "model parameter must not be null");
    Objects.requireNonNull(out, "out parameter must not be null");

    GenericModelWriter modelWriter = new GenericModelWriter((AbstractModel) model,
        new EncryptedDataOutputStream(new OutputStream() {
          @Override
          public void write(int b) throws IOException {
            out.write(b);
          }
        }));

    modelWriter.persist();
  }

  /**
   * Checks if the expected outcomes are all contained as outcomes in the given model.
   *
   * @param model
   * @param expectedOutcomes
   *
   * @return true if all expected outcomes are the only outcomes of the model.
   */
  public static boolean validateOutcomes(MaxentModel model, String... expectedOutcomes) {

    boolean result = true;

    if (expectedOutcomes.length == model.getNumOutcomes()) {

      Set expectedOutcomesSet = new HashSet<>();
      expectedOutcomesSet.addAll(Arrays.asList(expectedOutcomes));

      for (int i = 0; i < model.getNumOutcomes(); i++) {
        if (!expectedOutcomesSet.contains(model.getOutcome(i))) {
          result = false;
          break;
        }
      }
    }
    else {
      result = false;
    }

    return result;
  }

  /**
   * Writes the provided {@link InputStream} into a byte array
   * which is returned
   *
   * @param in stream to read data for the byte array from
   * @return byte array with the contents of the stream
   *
   * @throws IOException if an exception is thrown while reading
   *     from the provided {@link InputStream}
   */
  public static byte[] read(InputStream in) throws IOException {
    ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();

    int length;
    byte[] buffer = new byte[1024];
    while ((length = in.read(buffer)) > 0) {
      byteArrayOut.write(buffer, 0, length);
    }
    byteArrayOut.close();

    return byteArrayOut.toByteArray();
  }

  public static void addCutoffAndIterations(Map manifestInfoEntries,
      int cutoff, int iterations) {
    manifestInfoEntries.put(BaseModel.TRAINING_CUTOFF_PROPERTY, Integer.toString(cutoff));
    manifestInfoEntries.put(BaseModel.TRAINING_ITERATIONS_PROPERTY, Integer.toString(iterations));
  }

  /**
   * Creates the default training parameters in case they are not provided.
   *
   * Note: Do not use this method, internal use only!
   *
   *
   * @return training parameters instance
   */
  public static TrainingParameters createDefaultTrainingParameters() {
    TrainingParameters mlParams = new TrainingParameters();
    mlParams.put(TrainingParameters.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE);
    mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
    mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);

    return mlParams;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy