All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.stemmer.snowball.SnowballStemmer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.stemmer.snowball;

import opennlp.tools.stemmer.Stemmer;

public class SnowballStemmer implements Stemmer {

  public enum ALGORITHM {
    DANISH("dan"),
    DUTCH("nld"),
    ENGLISH("eng"),
    FINNISH("fin"),
    FRENCH("fra"),
    GERMAN("deu"),
    HUNGARIAN("hun"),
    IRISH("gle"),
    ITALIAN("ita"),
    NORWEGIAN("nor"),
    PORTER("porter"),
    PORTUGUESE("por"),
    ROMANIAN("ron"),
    RUSSIAN("rus"),
    SPANISH("spa"),
    SWEDISH("swe"),
    TURKISH("tur");

    private String languageCode;

    private ALGORITHM(String languageCode) {
      this.languageCode = languageCode;
    }

    public String getLanguageCode() {
      return languageCode;
    }

    public static ALGORITHM getByLanguageCode(String languageCode) {
      for (ALGORITHM algorithm : ALGORITHM.values()) {
        if (languageCode.equalsIgnoreCase(algorithm.getLanguageCode())) {
          return algorithm;
        }
      }
      throw new IllegalArgumentException("No stemmer for language code " + languageCode);
    }
  }

  private final AbstractSnowballStemmer stemmer;
  private final int repeat;

  public SnowballStemmer(ALGORITHM algorithm, int repeat) {
    this.repeat = repeat;

    if (ALGORITHM.DANISH.equals(algorithm)) {
      stemmer = new danishStemmer();
    }
    else if (ALGORITHM.DUTCH.equals(algorithm)) {
      stemmer = new dutchStemmer();
    }
    else if (ALGORITHM.ENGLISH.equals(algorithm)) {
      stemmer = new englishStemmer();
    }
    else if (ALGORITHM.FINNISH.equals(algorithm)) {
      stemmer = new finnishStemmer();
    }
    else if (ALGORITHM.FRENCH.equals(algorithm)) {
      stemmer = new frenchStemmer();
    }
    else if (ALGORITHM.GERMAN.equals(algorithm)) {
      stemmer = new germanStemmer();
    }
    else if (ALGORITHM.HUNGARIAN.equals(algorithm)) {
      stemmer = new hungarianStemmer();
    }
    else if (ALGORITHM.IRISH.equals(algorithm)) {
      stemmer = new irishStemmer();
    }
    else if (ALGORITHM.ITALIAN.equals(algorithm)) {
      stemmer = new italianStemmer();
    }
    else if (ALGORITHM.NORWEGIAN.equals(algorithm)) {
      stemmer = new norwegianStemmer();
    }
    else if (ALGORITHM.PORTER.equals(algorithm)) {
      stemmer = new porterStemmer();
    }
    else if (ALGORITHM.PORTUGUESE.equals(algorithm)) {
      stemmer = new portugueseStemmer();
    }
    else if (ALGORITHM.ROMANIAN.equals(algorithm)) {
      stemmer = new romanianStemmer();
    }
    else if (ALGORITHM.RUSSIAN.equals(algorithm)) {
      stemmer = new russianStemmer();
    }
    else if (ALGORITHM.SPANISH.equals(algorithm)) {
      stemmer = new spanishStemmer();
    }
    else if (ALGORITHM.SWEDISH.equals(algorithm)) {
      stemmer = new swedishStemmer();
    }
    else if (ALGORITHM.TURKISH.equals(algorithm)) {
      stemmer = new turkishStemmer();
    }
    else {
      throw new IllegalStateException("Unexpected stemmer algorithm: " + algorithm.toString());
    }
  }

  public SnowballStemmer(ALGORITHM algorithm) {
    this(algorithm, 1);
  }

  public CharSequence stem(CharSequence word) {

    stemmer.setCurrent(word.toString());

    for (int i = 0; i < repeat; i++) {
      stemmer.stem();
    }

    return stemmer.getCurrent();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy