All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.idylnlp.opennlp.custom.nlp.lemmatization.DefaultLemmatizer Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2018 Mountain Fog, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 ******************************************************************************/

package ai.idylnlp.opennlp.custom.nlp.lemmatization;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import ai.idylnlp.model.ModelValidator;
import ai.idylnlp.model.exceptions.ModelLoaderException;
import ai.idylnlp.model.manifest.StandardModelManifest;
import ai.idylnlp.model.nlp.lemma.Lemmatizer;
import ai.idylnlp.opennlp.custom.modelloader.LocalModelLoader;

import opennlp.tools.lemmatizer.DictionaryLemmatizer;
import opennlp.tools.lemmatizer.LemmatizerME;
import opennlp.tools.lemmatizer.LemmatizerModel;

/**
 * Default implementation of {@link Lemmatizer} that uses OpenNLP's
 * lemmatizing capabilities to provide dictionary-based and
 * model-based lemmatization.
 *
 * @author Mountain Fog, Inc.
 *
 */
public class DefaultLemmatizer implements Lemmatizer {

  private static final Logger LOGGER = LogManager.getLogger(DefaultLemmatizer.class);

  private opennlp.tools.lemmatizer.Lemmatizer lemmatizer;
  private boolean isModelBased;

  /**
   * Creates a new lemmatizer that uses a dictionary.
   * @param dictionary The full path to the dictionary file.
   * @throws IOException Thrown if the dictionary cannot be opened.
   */
  public DefaultLemmatizer(String dictionary) throws IOException {

    isModelBased = false;

    InputStream dictLemmatizer = new FileInputStream(dictionary);

    lemmatizer = new DictionaryLemmatizer(dictLemmatizer);

    dictLemmatizer.close();

  }

  /**
   * Creates a new model-based lemmatizer.
   * @param modelPath The full path to the directory containing the model.
   * @param modelManifest The {@link StandardModelManifest manifest} of the lemmatizer model.
   * @param validator The {@link ModelValidator} used to validate the model.
   * @throws ModelLoaderException Thrown if the model cannot be loaded.
   */
  public DefaultLemmatizer(String modelPath, StandardModelManifest modelManifest, ModelValidator validator) throws ModelLoaderException {

    isModelBased = true;

    LocalModelLoader lemmaModelLoader = new LocalModelLoader(validator, modelPath);

    LemmatizerModel model = lemmaModelLoader.getModel(modelManifest, LemmatizerModel.class);

    lemmatizer = new LemmatizerME(model);

  }

  /**
   * {@inheritDoc}
   * 

* How the lemmatization is performed depends on which constructor * was used to create the class. The lemmatization could be * dictionary-based or model-based. */ @Override public String[] lemmatize(String[] tokens, String[] posTags) { String[] lemmas = lemmatizer.lemmatize(tokens, posTags); if(isModelBased) { // Must call decodeLemmas for model-based lemmatization. lemmas = ((LemmatizerME) lemmatizer).decodeLemmas(tokens, lemmas); } return lemmas; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy