All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.br.BrazilianStemmer Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.br;

import java.util.Locale;

/** A stemmer for Brazilian Portuguese words. */
public class BrazilianStemmer {
  private static final Locale locale = new Locale("pt", "BR");

  /** Changed term */
  private String TERM;

  private String CT;
  private String R1;
  private String R2;
  private String RV;

  public BrazilianStemmer() {}

  /**
   * Stems the given term to an unique discriminator.
   *
   * @param term The term that should be stemmed.
   * @return Discriminator for term
   */
  protected String stem(String term) {
    boolean altered = false; // altered the term

    // creates CT
    createCT(term);

    if (!isIndexable(CT)) {
      return null;
    }
    if (!isStemmable(CT)) {
      return CT;
    }

    R1 = getR1(CT);
    R2 = getR1(R1);
    RV = getRV(CT);
    TERM = term + ";" + CT;

    altered = step1();
    if (!altered) {
      altered = step2();
    }

    if (altered) {
      step3();
    } else {
      step4();
    }

    step5();

    return CT;
  }

  /**
   * Checks a term if it can be processed correctly.
   *
   * @return true if, and only if, the given term consists in letters.
   */
  private boolean isStemmable(String term) {
    for (int c = 0; c < term.length(); c++) {
      // Discard terms that contain non-letter characters.
      if (!Character.isLetter(term.charAt(c))) {
        return false;
      }
    }
    return true;
  }

  /**
   * Checks a term if it can be processed indexed.
   *
   * @return true if it can be indexed
   */
  private boolean isIndexable(String term) {
    return (term.length() < 30) && (term.length() > 2);
  }

  /**
   * See if string is 'a','e','i','o','u'
   *
   * @return true if is vowel
   */
  private boolean isVowel(char value) {
    return (value == 'a') || (value == 'e') || (value == 'i') || (value == 'o') || (value == 'u');
  }

  /**
   * Gets R1
   *
   * 

R1 - is the region after the first non-vowel following a vowel, or is the null region at the * end of the word if there is no such non-vowel. * * @return null or a string representing R1 */ private String getR1(String value) { int i; int j; // be-safe !!! if (value == null) { return null; } // find 1st vowel i = value.length() - 1; for (j = 0; j < i; j++) { if (isVowel(value.charAt(j))) { break; } } if (!(j < i)) { return null; } // find 1st non-vowel for (; j < i; j++) { if (!(isVowel(value.charAt(j)))) { break; } } if (!(j < i)) { return null; } return value.substring(j + 1); } /** * Gets RV * *

RV - IF the second letter is a consonant, RV is the region after the next following vowel, * *

OR if the first two letters are vowels, RV is the region after the next consonant, * *

AND otherwise (consonant-vowel case) RV is the region after the third letter. * *

BUT RV is the end of the word if this positions cannot be found. * * @return null or a string representing RV */ private String getRV(String value) { int i; int j; // be-safe !!! if (value == null) { return null; } i = value.length() - 1; // RV - IF the second letter is a consonant, RV is the region after // the next following vowel, if ((i > 0) && !isVowel(value.charAt(1))) { // find 1st vowel for (j = 2; j < i; j++) { if (isVowel(value.charAt(j))) { break; } } if (j < i) { return value.substring(j + 1); } } // RV - OR if the first two letters are vowels, RV is the region // after the next consonant, if ((i > 1) && isVowel(value.charAt(0)) && isVowel(value.charAt(1))) { // find 1st consoant for (j = 2; j < i; j++) { if (!isVowel(value.charAt(j))) { break; } } if (j < i) { return value.substring(j + 1); } } // RV - AND otherwise (consonant-vowel case) RV is the region after // the third letter. if (i > 2) { return value.substring(3); } return null; } /** * 1) Turn to lowercase 2) Remove accents 3) ã -> a ; õ -> o 4) ç -> c * * @return null or a string transformed */ private String changeTerm(String value) { int j; String r = ""; // be-safe !!! if (value == null) { return null; } value = value.toLowerCase(locale); for (j = 0; j < value.length(); j++) { if ((value.charAt(j) == 'á') || (value.charAt(j) == 'â') || (value.charAt(j) == 'ã')) { r = r + "a"; continue; } if ((value.charAt(j) == 'é') || (value.charAt(j) == 'ê')) { r = r + "e"; continue; } if (value.charAt(j) == 'í') { r = r + "i"; continue; } if ((value.charAt(j) == 'ó') || (value.charAt(j) == 'ô') || (value.charAt(j) == 'õ')) { r = r + "o"; continue; } if ((value.charAt(j) == 'ú') || (value.charAt(j) == 'ü')) { r = r + "u"; continue; } if (value.charAt(j) == 'ç') { r = r + "c"; continue; } if (value.charAt(j) == 'ñ') { r = r + "n"; continue; } r = r + value.charAt(j); } return r; } /** * Check if a string ends with a suffix * * @return true if the string ends with the specified suffix */ private boolean suffix(String value, String suffix) { // be-safe !!! if ((value == null) || (suffix == null)) { return false; } if (suffix.length() > value.length()) { return false; } return value.substring(value.length() - suffix.length()).equals(suffix); } /** * Replace a string suffix by another * * @return the replaced String */ private String replaceSuffix(String value, String toReplace, String changeTo) { String vvalue; // be-safe !!! if ((value == null) || (toReplace == null) || (changeTo == null)) { return value; } vvalue = removeSuffix(value, toReplace); if (value.equals(vvalue)) { return value; } else { return vvalue + changeTo; } } /** * Remove a string suffix * * @return the String without the suffix */ private String removeSuffix(String value, String toRemove) { // be-safe !!! if ((value == null) || (toRemove == null) || !suffix(value, toRemove)) { return value; } return value.substring(0, value.length() - toRemove.length()); } /** * See if a suffix is preceded by a String * * @return true if the suffix is preceded */ private boolean suffixPreceded(String value, String suffix, String preceded) { // be-safe !!! if ((value == null) || (suffix == null) || (preceded == null) || !suffix(value, suffix)) { return false; } return suffix(removeSuffix(value, suffix), preceded); } /** Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. */ private void createCT(String term) { CT = changeTerm(term); if (CT.length() < 2) return; // if the first character is ... , remove it if ((CT.charAt(0) == '"') || (CT.charAt(0) == '\'') || (CT.charAt(0) == '-') || (CT.charAt(0) == ',') || (CT.charAt(0) == ';') || (CT.charAt(0) == '.') || (CT.charAt(0) == '?') || (CT.charAt(0) == '!')) { CT = CT.substring(1); } if (CT.length() < 2) return; // if the last character is ... , remove it if ((CT.charAt(CT.length() - 1) == '-') || (CT.charAt(CT.length() - 1) == ',') || (CT.charAt(CT.length() - 1) == ';') || (CT.charAt(CT.length() - 1) == '.') || (CT.charAt(CT.length() - 1) == '?') || (CT.charAt(CT.length() - 1) == '!') || (CT.charAt(CT.length() - 1) == '\'') || (CT.charAt(CT.length() - 1) == '"')) { CT = CT.substring(0, CT.length() - 1); } } /** * Standard suffix removal. Search for the longest among the following suffixes, and perform the * following actions: * * @return false if no ending was removed */ private boolean step1() { if (CT == null) return false; // suffix length = 7 if (suffix(CT, "uciones") && suffix(R2, "uciones")) { CT = replaceSuffix(CT, "uciones", "u"); return true; } // suffix length = 6 if (CT.length() >= 6) { if (suffix(CT, "imentos") && suffix(R2, "imentos")) { CT = removeSuffix(CT, "imentos"); return true; } if (suffix(CT, "amentos") && suffix(R2, "amentos")) { CT = removeSuffix(CT, "amentos"); return true; } if (suffix(CT, "adores") && suffix(R2, "adores")) { CT = removeSuffix(CT, "adores"); return true; } if (suffix(CT, "adoras") && suffix(R2, "adoras")) { CT = removeSuffix(CT, "adoras"); return true; } if (suffix(CT, "logias") && suffix(R2, "logias")) { replaceSuffix(CT, "logias", "log"); return true; } if (suffix(CT, "encias") && suffix(R2, "encias")) { CT = replaceSuffix(CT, "encias", "ente"); return true; } if (suffix(CT, "amente") && suffix(R1, "amente")) { CT = removeSuffix(CT, "amente"); return true; } if (suffix(CT, "idades") && suffix(R2, "idades")) { CT = removeSuffix(CT, "idades"); return true; } } // suffix length = 5 if (CT.length() >= 5) { if (suffix(CT, "acoes") && suffix(R2, "acoes")) { CT = removeSuffix(CT, "acoes"); return true; } if (suffix(CT, "imento") && suffix(R2, "imento")) { CT = removeSuffix(CT, "imento"); return true; } if (suffix(CT, "amento") && suffix(R2, "amento")) { CT = removeSuffix(CT, "amento"); return true; } if (suffix(CT, "adora") && suffix(R2, "adora")) { CT = removeSuffix(CT, "adora"); return true; } if (suffix(CT, "ismos") && suffix(R2, "ismos")) { CT = removeSuffix(CT, "ismos"); return true; } if (suffix(CT, "istas") && suffix(R2, "istas")) { CT = removeSuffix(CT, "istas"); return true; } if (suffix(CT, "logia") && suffix(R2, "logia")) { CT = replaceSuffix(CT, "logia", "log"); return true; } if (suffix(CT, "ucion") && suffix(R2, "ucion")) { CT = replaceSuffix(CT, "ucion", "u"); return true; } if (suffix(CT, "encia") && suffix(R2, "encia")) { CT = replaceSuffix(CT, "encia", "ente"); return true; } if (suffix(CT, "mente") && suffix(R2, "mente")) { CT = removeSuffix(CT, "mente"); return true; } if (suffix(CT, "idade") && suffix(R2, "idade")) { CT = removeSuffix(CT, "idade"); return true; } } // suffix length = 4 if (CT.length() >= 4) { if (suffix(CT, "acao") && suffix(R2, "acao")) { CT = removeSuffix(CT, "acao"); return true; } if (suffix(CT, "ezas") && suffix(R2, "ezas")) { CT = removeSuffix(CT, "ezas"); return true; } if (suffix(CT, "icos") && suffix(R2, "icos")) { CT = removeSuffix(CT, "icos"); return true; } if (suffix(CT, "icas") && suffix(R2, "icas")) { CT = removeSuffix(CT, "icas"); return true; } if (suffix(CT, "ismo") && suffix(R2, "ismo")) { CT = removeSuffix(CT, "ismo"); return true; } if (suffix(CT, "avel") && suffix(R2, "avel")) { CT = removeSuffix(CT, "avel"); return true; } if (suffix(CT, "ivel") && suffix(R2, "ivel")) { CT = removeSuffix(CT, "ivel"); return true; } if (suffix(CT, "ista") && suffix(R2, "ista")) { CT = removeSuffix(CT, "ista"); return true; } if (suffix(CT, "osos") && suffix(R2, "osos")) { CT = removeSuffix(CT, "osos"); return true; } if (suffix(CT, "osas") && suffix(R2, "osas")) { CT = removeSuffix(CT, "osas"); return true; } if (suffix(CT, "ador") && suffix(R2, "ador")) { CT = removeSuffix(CT, "ador"); return true; } if (suffix(CT, "ivas") && suffix(R2, "ivas")) { CT = removeSuffix(CT, "ivas"); return true; } if (suffix(CT, "ivos") && suffix(R2, "ivos")) { CT = removeSuffix(CT, "ivos"); return true; } if (suffix(CT, "iras") && suffix(RV, "iras") && suffixPreceded(CT, "iras", "e")) { CT = replaceSuffix(CT, "iras", "ir"); return true; } } // suffix length = 3 if (CT.length() >= 3) { if (suffix(CT, "eza") && suffix(R2, "eza")) { CT = removeSuffix(CT, "eza"); return true; } if (suffix(CT, "ico") && suffix(R2, "ico")) { CT = removeSuffix(CT, "ico"); return true; } if (suffix(CT, "ica") && suffix(R2, "ica")) { CT = removeSuffix(CT, "ica"); return true; } if (suffix(CT, "oso") && suffix(R2, "oso")) { CT = removeSuffix(CT, "oso"); return true; } if (suffix(CT, "osa") && suffix(R2, "osa")) { CT = removeSuffix(CT, "osa"); return true; } if (suffix(CT, "iva") && suffix(R2, "iva")) { CT = removeSuffix(CT, "iva"); return true; } if (suffix(CT, "ivo") && suffix(R2, "ivo")) { CT = removeSuffix(CT, "ivo"); return true; } if (suffix(CT, "ira") && suffix(RV, "ira") && suffixPreceded(CT, "ira", "e")) { CT = replaceSuffix(CT, "ira", "ir"); return true; } } // no ending was removed by step1 return false; } /** * Verb suffixes. * *

Search for the longest among the following suffixes in RV, and if found, delete. * * @return false if no ending was removed */ private boolean step2() { if (RV == null) return false; // suffix lenght = 7 if (RV.length() >= 7) { if (suffix(RV, "issemos")) { CT = removeSuffix(CT, "issemos"); return true; } if (suffix(RV, "essemos")) { CT = removeSuffix(CT, "essemos"); return true; } if (suffix(RV, "assemos")) { CT = removeSuffix(CT, "assemos"); return true; } if (suffix(RV, "ariamos")) { CT = removeSuffix(CT, "ariamos"); return true; } if (suffix(RV, "eriamos")) { CT = removeSuffix(CT, "eriamos"); return true; } if (suffix(RV, "iriamos")) { CT = removeSuffix(CT, "iriamos"); return true; } } // suffix length = 6 if (RV.length() >= 6) { if (suffix(RV, "iremos")) { CT = removeSuffix(CT, "iremos"); return true; } if (suffix(RV, "eremos")) { CT = removeSuffix(CT, "eremos"); return true; } if (suffix(RV, "aremos")) { CT = removeSuffix(CT, "aremos"); return true; } if (suffix(RV, "avamos")) { CT = removeSuffix(CT, "avamos"); return true; } if (suffix(RV, "iramos")) { CT = removeSuffix(CT, "iramos"); return true; } if (suffix(RV, "eramos")) { CT = removeSuffix(CT, "eramos"); return true; } if (suffix(RV, "aramos")) { CT = removeSuffix(CT, "aramos"); return true; } if (suffix(RV, "asseis")) { CT = removeSuffix(CT, "asseis"); return true; } if (suffix(RV, "esseis")) { CT = removeSuffix(CT, "esseis"); return true; } if (suffix(RV, "isseis")) { CT = removeSuffix(CT, "isseis"); return true; } if (suffix(RV, "arieis")) { CT = removeSuffix(CT, "arieis"); return true; } if (suffix(RV, "erieis")) { CT = removeSuffix(CT, "erieis"); return true; } if (suffix(RV, "irieis")) { CT = removeSuffix(CT, "irieis"); return true; } } // suffix length = 5 if (RV.length() >= 5) { if (suffix(RV, "irmos")) { CT = removeSuffix(CT, "irmos"); return true; } if (suffix(RV, "iamos")) { CT = removeSuffix(CT, "iamos"); return true; } if (suffix(RV, "armos")) { CT = removeSuffix(CT, "armos"); return true; } if (suffix(RV, "ermos")) { CT = removeSuffix(CT, "ermos"); return true; } if (suffix(RV, "areis")) { CT = removeSuffix(CT, "areis"); return true; } if (suffix(RV, "ereis")) { CT = removeSuffix(CT, "ereis"); return true; } if (suffix(RV, "ireis")) { CT = removeSuffix(CT, "ireis"); return true; } if (suffix(RV, "asses")) { CT = removeSuffix(CT, "asses"); return true; } if (suffix(RV, "esses")) { CT = removeSuffix(CT, "esses"); return true; } if (suffix(RV, "isses")) { CT = removeSuffix(CT, "isses"); return true; } if (suffix(RV, "astes")) { CT = removeSuffix(CT, "astes"); return true; } if (suffix(RV, "assem")) { CT = removeSuffix(CT, "assem"); return true; } if (suffix(RV, "essem")) { CT = removeSuffix(CT, "essem"); return true; } if (suffix(RV, "issem")) { CT = removeSuffix(CT, "issem"); return true; } if (suffix(RV, "ardes")) { CT = removeSuffix(CT, "ardes"); return true; } if (suffix(RV, "erdes")) { CT = removeSuffix(CT, "erdes"); return true; } if (suffix(RV, "irdes")) { CT = removeSuffix(CT, "irdes"); return true; } if (suffix(RV, "ariam")) { CT = removeSuffix(CT, "ariam"); return true; } if (suffix(RV, "eriam")) { CT = removeSuffix(CT, "eriam"); return true; } if (suffix(RV, "iriam")) { CT = removeSuffix(CT, "iriam"); return true; } if (suffix(RV, "arias")) { CT = removeSuffix(CT, "arias"); return true; } if (suffix(RV, "erias")) { CT = removeSuffix(CT, "erias"); return true; } if (suffix(RV, "irias")) { CT = removeSuffix(CT, "irias"); return true; } if (suffix(RV, "estes")) { CT = removeSuffix(CT, "estes"); return true; } if (suffix(RV, "istes")) { CT = removeSuffix(CT, "istes"); return true; } if (suffix(RV, "areis")) { CT = removeSuffix(CT, "areis"); return true; } if (suffix(RV, "aveis")) { CT = removeSuffix(CT, "aveis"); return true; } } // suffix length = 4 if (RV.length() >= 4) { if (suffix(RV, "aria")) { CT = removeSuffix(CT, "aria"); return true; } if (suffix(RV, "eria")) { CT = removeSuffix(CT, "eria"); return true; } if (suffix(RV, "iria")) { CT = removeSuffix(CT, "iria"); return true; } if (suffix(RV, "asse")) { CT = removeSuffix(CT, "asse"); return true; } if (suffix(RV, "esse")) { CT = removeSuffix(CT, "esse"); return true; } if (suffix(RV, "isse")) { CT = removeSuffix(CT, "isse"); return true; } if (suffix(RV, "aste")) { CT = removeSuffix(CT, "aste"); return true; } if (suffix(RV, "este")) { CT = removeSuffix(CT, "este"); return true; } if (suffix(RV, "iste")) { CT = removeSuffix(CT, "iste"); return true; } if (suffix(RV, "arei")) { CT = removeSuffix(CT, "arei"); return true; } if (suffix(RV, "erei")) { CT = removeSuffix(CT, "erei"); return true; } if (suffix(RV, "irei")) { CT = removeSuffix(CT, "irei"); return true; } if (suffix(RV, "aram")) { CT = removeSuffix(CT, "aram"); return true; } if (suffix(RV, "eram")) { CT = removeSuffix(CT, "eram"); return true; } if (suffix(RV, "iram")) { CT = removeSuffix(CT, "iram"); return true; } if (suffix(RV, "avam")) { CT = removeSuffix(CT, "avam"); return true; } if (suffix(RV, "arem")) { CT = removeSuffix(CT, "arem"); return true; } if (suffix(RV, "erem")) { CT = removeSuffix(CT, "erem"); return true; } if (suffix(RV, "irem")) { CT = removeSuffix(CT, "irem"); return true; } if (suffix(RV, "ando")) { CT = removeSuffix(CT, "ando"); return true; } if (suffix(RV, "endo")) { CT = removeSuffix(CT, "endo"); return true; } if (suffix(RV, "indo")) { CT = removeSuffix(CT, "indo"); return true; } if (suffix(RV, "arao")) { CT = removeSuffix(CT, "arao"); return true; } if (suffix(RV, "erao")) { CT = removeSuffix(CT, "erao"); return true; } if (suffix(RV, "irao")) { CT = removeSuffix(CT, "irao"); return true; } if (suffix(RV, "adas")) { CT = removeSuffix(CT, "adas"); return true; } if (suffix(RV, "idas")) { CT = removeSuffix(CT, "idas"); return true; } if (suffix(RV, "aras")) { CT = removeSuffix(CT, "aras"); return true; } if (suffix(RV, "eras")) { CT = removeSuffix(CT, "eras"); return true; } if (suffix(RV, "iras")) { CT = removeSuffix(CT, "iras"); return true; } if (suffix(RV, "avas")) { CT = removeSuffix(CT, "avas"); return true; } if (suffix(RV, "ares")) { CT = removeSuffix(CT, "ares"); return true; } if (suffix(RV, "eres")) { CT = removeSuffix(CT, "eres"); return true; } if (suffix(RV, "ires")) { CT = removeSuffix(CT, "ires"); return true; } if (suffix(RV, "ados")) { CT = removeSuffix(CT, "ados"); return true; } if (suffix(RV, "idos")) { CT = removeSuffix(CT, "idos"); return true; } if (suffix(RV, "amos")) { CT = removeSuffix(CT, "amos"); return true; } if (suffix(RV, "emos")) { CT = removeSuffix(CT, "emos"); return true; } if (suffix(RV, "imos")) { CT = removeSuffix(CT, "imos"); return true; } if (suffix(RV, "iras")) { CT = removeSuffix(CT, "iras"); return true; } if (suffix(RV, "ieis")) { CT = removeSuffix(CT, "ieis"); return true; } } // suffix length = 3 if (RV.length() >= 3) { if (suffix(RV, "ada")) { CT = removeSuffix(CT, "ada"); return true; } if (suffix(RV, "ida")) { CT = removeSuffix(CT, "ida"); return true; } if (suffix(RV, "ara")) { CT = removeSuffix(CT, "ara"); return true; } if (suffix(RV, "era")) { CT = removeSuffix(CT, "era"); return true; } if (suffix(RV, "ira")) { CT = removeSuffix(CT, "ava"); return true; } if (suffix(RV, "iam")) { CT = removeSuffix(CT, "iam"); return true; } if (suffix(RV, "ado")) { CT = removeSuffix(CT, "ado"); return true; } if (suffix(RV, "ido")) { CT = removeSuffix(CT, "ido"); return true; } if (suffix(RV, "ias")) { CT = removeSuffix(CT, "ias"); return true; } if (suffix(RV, "ais")) { CT = removeSuffix(CT, "ais"); return true; } if (suffix(RV, "eis")) { CT = removeSuffix(CT, "eis"); return true; } if (suffix(RV, "ira")) { CT = removeSuffix(CT, "ira"); return true; } if (suffix(RV, "ear")) { CT = removeSuffix(CT, "ear"); return true; } } // suffix length = 2 if (RV.length() >= 2) { if (suffix(RV, "ia")) { CT = removeSuffix(CT, "ia"); return true; } if (suffix(RV, "ei")) { CT = removeSuffix(CT, "ei"); return true; } if (suffix(RV, "am")) { CT = removeSuffix(CT, "am"); return true; } if (suffix(RV, "em")) { CT = removeSuffix(CT, "em"); return true; } if (suffix(RV, "ar")) { CT = removeSuffix(CT, "ar"); return true; } if (suffix(RV, "er")) { CT = removeSuffix(CT, "er"); return true; } if (suffix(RV, "ir")) { CT = removeSuffix(CT, "ir"); return true; } if (suffix(RV, "as")) { CT = removeSuffix(CT, "as"); return true; } if (suffix(RV, "es")) { CT = removeSuffix(CT, "es"); return true; } if (suffix(RV, "is")) { CT = removeSuffix(CT, "is"); return true; } if (suffix(RV, "eu")) { CT = removeSuffix(CT, "eu"); return true; } if (suffix(RV, "iu")) { CT = removeSuffix(CT, "iu"); return true; } if (suffix(RV, "iu")) { CT = removeSuffix(CT, "iu"); return true; } if (suffix(RV, "ou")) { CT = removeSuffix(CT, "ou"); return true; } } // no ending was removed by step2 return false; } /** Delete suffix 'i' if in RV and preceded by 'c' */ private void step3() { if (RV == null) return; if (suffix(RV, "i") && suffixPreceded(RV, "i", "c")) { CT = removeSuffix(CT, "i"); } } /** * Residual suffix * *

If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it */ private void step4() { if (RV == null) return; if (suffix(RV, "os")) { CT = removeSuffix(CT, "os"); return; } if (suffix(RV, "a")) { CT = removeSuffix(CT, "a"); return; } if (suffix(RV, "i")) { CT = removeSuffix(CT, "i"); return; } if (suffix(RV, "o")) { CT = removeSuffix(CT, "o"); return; } } /** * If the word ends with one of ( e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with * the 'u' (or 'i') in RV, delete the 'u' (or 'i') * *

Or if the word ends ç remove the cedilha */ private void step5() { if (RV == null) return; if (suffix(RV, "e")) { if (suffixPreceded(RV, "e", "gu")) { CT = removeSuffix(CT, "e"); CT = removeSuffix(CT, "u"); return; } if (suffixPreceded(RV, "e", "ci")) { CT = removeSuffix(CT, "e"); CT = removeSuffix(CT, "i"); return; } CT = removeSuffix(CT, "e"); return; } } /** * For log and debug purpose * * @return TERM, CT, RV, R1 and R2 */ public String log() { return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")"; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy