org.apache.lucene.analysis.br.BrazilianStemmer Maven / Gradle / Ivy
Show all versions of lucene-analysis-common Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.br;
import java.util.Locale;
/** A stemmer for Brazilian Portuguese words. */
class BrazilianStemmer {
private static final Locale locale = new Locale.Builder().setLanguageTag("pt-BR").build();
/** Changed term */
private String TERM;
private String CT;
private String R1;
private String R2;
private String RV;
public BrazilianStemmer() {}
/**
* Stems the given term to an unique discriminator
.
*
* @param term The term that should be stemmed.
* @return Discriminator for term
*/
protected String stem(String term) {
boolean altered = false; // altered the term
// creates CT
createCT(term);
if (!isIndexable(CT)) {
return null;
}
if (!isStemmable(CT)) {
return CT;
}
R1 = getR1(CT);
R2 = getR1(R1);
RV = getRV(CT);
TERM = term + ";" + CT;
altered = step1();
if (!altered) {
altered = step2();
}
if (altered) {
step3();
} else {
step4();
}
step5();
return CT;
}
/**
* Checks a term if it can be processed correctly.
*
* @return true if, and only if, the given term consists in letters.
*/
private boolean isStemmable(String term) {
for (int c = 0; c < term.length(); c++) {
// Discard terms that contain non-letter characters.
if (!Character.isLetter(term.charAt(c))) {
return false;
}
}
return true;
}
/**
* Checks a term if it can be processed indexed.
*
* @return true if it can be indexed
*/
private boolean isIndexable(String term) {
return (term.length() < 30) && (term.length() > 2);
}
/**
* See if string is 'a','e','i','o','u'
*
* @return true if is vowel
*/
private boolean isVowel(char value) {
return (value == 'a') || (value == 'e') || (value == 'i') || (value == 'o') || (value == 'u');
}
/**
* Gets R1
*
* R1 - is the region after the first non-vowel following a vowel, or is the null region at the
* end of the word if there is no such non-vowel.
*
* @return null or a string representing R1
*/
private String getR1(String value) {
int i;
int j;
// be-safe !!!
if (value == null) {
return null;
}
// find 1st vowel
i = value.length() - 1;
for (j = 0; j < i; j++) {
if (isVowel(value.charAt(j))) {
break;
}
}
if (!(j < i)) {
return null;
}
// find 1st non-vowel
for (; j < i; j++) {
if (!(isVowel(value.charAt(j)))) {
break;
}
}
if (!(j < i)) {
return null;
}
return value.substring(j + 1);
}
/**
* Gets RV
*
*
RV - IF the second letter is a consonant, RV is the region after the next following vowel,
*
*
OR if the first two letters are vowels, RV is the region after the next consonant,
*
*
AND otherwise (consonant-vowel case) RV is the region after the third letter.
*
*
BUT RV is the end of the word if this positions cannot be found.
*
* @return null or a string representing RV
*/
private String getRV(String value) {
int i;
int j;
// be-safe !!!
if (value == null) {
return null;
}
i = value.length() - 1;
// RV - IF the second letter is a consonant, RV is the region after
// the next following vowel,
if ((i > 0) && !isVowel(value.charAt(1))) {
// find 1st vowel
for (j = 2; j < i; j++) {
if (isVowel(value.charAt(j))) {
break;
}
}
if (j < i) {
return value.substring(j + 1);
}
}
// RV - OR if the first two letters are vowels, RV is the region
// after the next consonant,
if ((i > 1) && isVowel(value.charAt(0)) && isVowel(value.charAt(1))) {
// find 1st consoant
for (j = 2; j < i; j++) {
if (!isVowel(value.charAt(j))) {
break;
}
}
if (j < i) {
return value.substring(j + 1);
}
}
// RV - AND otherwise (consonant-vowel case) RV is the region after
// the third letter.
if (i > 2) {
return value.substring(3);
}
return null;
}
/**
* 1) Turn to lowercase 2) Remove accents 3) ã -> a ; õ -> o 4) ç -> c
*
* @return null or a string transformed
*/
private String changeTerm(String value) {
int j;
String r = "";
// be-safe !!!
if (value == null) {
return null;
}
value = value.toLowerCase(locale);
for (j = 0; j < value.length(); j++) {
if ((value.charAt(j) == 'á') || (value.charAt(j) == 'â') || (value.charAt(j) == 'ã')) {
r = r + "a";
continue;
}
if ((value.charAt(j) == 'é') || (value.charAt(j) == 'ê')) {
r = r + "e";
continue;
}
if (value.charAt(j) == 'í') {
r = r + "i";
continue;
}
if ((value.charAt(j) == 'ó') || (value.charAt(j) == 'ô') || (value.charAt(j) == 'õ')) {
r = r + "o";
continue;
}
if ((value.charAt(j) == 'ú') || (value.charAt(j) == 'ü')) {
r = r + "u";
continue;
}
if (value.charAt(j) == 'ç') {
r = r + "c";
continue;
}
if (value.charAt(j) == 'ñ') {
r = r + "n";
continue;
}
r = r + value.charAt(j);
}
return r;
}
/**
* Check if a string ends with a suffix
*
* @return true if the string ends with the specified suffix
*/
private boolean suffix(String value, String suffix) {
// be-safe !!!
if ((value == null) || (suffix == null)) {
return false;
}
if (suffix.length() > value.length()) {
return false;
}
return value.endsWith(suffix);
}
/**
* Replace a string suffix by another
*
* @return the replaced String
*/
private String replaceSuffix(String value, String toReplace, String changeTo) {
String vvalue;
// be-safe !!!
if ((value == null) || (toReplace == null) || (changeTo == null)) {
return value;
}
vvalue = removeSuffix(value, toReplace);
if (value.equals(vvalue)) {
return value;
} else {
return vvalue + changeTo;
}
}
/**
* Remove a string suffix
*
* @return the String without the suffix
*/
private String removeSuffix(String value, String toRemove) {
// be-safe !!!
if ((value == null) || (toRemove == null) || !suffix(value, toRemove)) {
return value;
}
return value.substring(0, value.length() - toRemove.length());
}
/**
* See if a suffix is preceded by a String
*
* @return true if the suffix is preceded
*/
private boolean suffixPreceded(String value, String suffix, String preceded) {
// be-safe !!!
if ((value == null) || (suffix == null) || (preceded == null) || !suffix(value, suffix)) {
return false;
}
return suffix(removeSuffix(value, suffix), preceded);
}
/** Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. */
private void createCT(String term) {
CT = changeTerm(term);
if (CT.length() < 2) return;
// if the first character is ... , remove it
if ((CT.charAt(0) == '"')
|| (CT.charAt(0) == '\'')
|| (CT.charAt(0) == '-')
|| (CT.charAt(0) == ',')
|| (CT.charAt(0) == ';')
|| (CT.charAt(0) == '.')
|| (CT.charAt(0) == '?')
|| (CT.charAt(0) == '!')) {
CT = CT.substring(1);
}
if (CT.length() < 2) return;
// if the last character is ... , remove it
if ((CT.charAt(CT.length() - 1) == '-')
|| (CT.charAt(CT.length() - 1) == ',')
|| (CT.charAt(CT.length() - 1) == ';')
|| (CT.charAt(CT.length() - 1) == '.')
|| (CT.charAt(CT.length() - 1) == '?')
|| (CT.charAt(CT.length() - 1) == '!')
|| (CT.charAt(CT.length() - 1) == '\'')
|| (CT.charAt(CT.length() - 1) == '"')) {
CT = CT.substring(0, CT.length() - 1);
}
}
/**
* Standard suffix removal. Search for the longest among the following suffixes, and perform the
* following actions:
*
* @return false if no ending was removed
*/
private boolean step1() {
if (CT == null) return false;
// suffix length = 7
if (suffix(CT, "uciones") && suffix(R2, "uciones")) {
CT = replaceSuffix(CT, "uciones", "u");
return true;
}
// suffix length = 6
if (CT.length() >= 6) {
if (suffix(CT, "imentos") && suffix(R2, "imentos")) {
CT = removeSuffix(CT, "imentos");
return true;
}
if (suffix(CT, "amentos") && suffix(R2, "amentos")) {
CT = removeSuffix(CT, "amentos");
return true;
}
if (suffix(CT, "adores") && suffix(R2, "adores")) {
CT = removeSuffix(CT, "adores");
return true;
}
if (suffix(CT, "adoras") && suffix(R2, "adoras")) {
CT = removeSuffix(CT, "adoras");
return true;
}
if (suffix(CT, "logias") && suffix(R2, "logias")) {
replaceSuffix(CT, "logias", "log");
return true;
}
if (suffix(CT, "encias") && suffix(R2, "encias")) {
CT = replaceSuffix(CT, "encias", "ente");
return true;
}
if (suffix(CT, "amente") && suffix(R1, "amente")) {
CT = removeSuffix(CT, "amente");
return true;
}
if (suffix(CT, "idades") && suffix(R2, "idades")) {
CT = removeSuffix(CT, "idades");
return true;
}
}
// suffix length = 5
if (CT.length() >= 5) {
if (suffix(CT, "acoes") && suffix(R2, "acoes")) {
CT = removeSuffix(CT, "acoes");
return true;
}
if (suffix(CT, "imento") && suffix(R2, "imento")) {
CT = removeSuffix(CT, "imento");
return true;
}
if (suffix(CT, "amento") && suffix(R2, "amento")) {
CT = removeSuffix(CT, "amento");
return true;
}
if (suffix(CT, "adora") && suffix(R2, "adora")) {
CT = removeSuffix(CT, "adora");
return true;
}
if (suffix(CT, "ismos") && suffix(R2, "ismos")) {
CT = removeSuffix(CT, "ismos");
return true;
}
if (suffix(CT, "istas") && suffix(R2, "istas")) {
CT = removeSuffix(CT, "istas");
return true;
}
if (suffix(CT, "logia") && suffix(R2, "logia")) {
CT = replaceSuffix(CT, "logia", "log");
return true;
}
if (suffix(CT, "ucion") && suffix(R2, "ucion")) {
CT = replaceSuffix(CT, "ucion", "u");
return true;
}
if (suffix(CT, "encia") && suffix(R2, "encia")) {
CT = replaceSuffix(CT, "encia", "ente");
return true;
}
if (suffix(CT, "mente") && suffix(R2, "mente")) {
CT = removeSuffix(CT, "mente");
return true;
}
if (suffix(CT, "idade") && suffix(R2, "idade")) {
CT = removeSuffix(CT, "idade");
return true;
}
}
// suffix length = 4
if (CT.length() >= 4) {
if (suffix(CT, "acao") && suffix(R2, "acao")) {
CT = removeSuffix(CT, "acao");
return true;
}
if (suffix(CT, "ezas") && suffix(R2, "ezas")) {
CT = removeSuffix(CT, "ezas");
return true;
}
if (suffix(CT, "icos") && suffix(R2, "icos")) {
CT = removeSuffix(CT, "icos");
return true;
}
if (suffix(CT, "icas") && suffix(R2, "icas")) {
CT = removeSuffix(CT, "icas");
return true;
}
if (suffix(CT, "ismo") && suffix(R2, "ismo")) {
CT = removeSuffix(CT, "ismo");
return true;
}
if (suffix(CT, "avel") && suffix(R2, "avel")) {
CT = removeSuffix(CT, "avel");
return true;
}
if (suffix(CT, "ivel") && suffix(R2, "ivel")) {
CT = removeSuffix(CT, "ivel");
return true;
}
if (suffix(CT, "ista") && suffix(R2, "ista")) {
CT = removeSuffix(CT, "ista");
return true;
}
if (suffix(CT, "osos") && suffix(R2, "osos")) {
CT = removeSuffix(CT, "osos");
return true;
}
if (suffix(CT, "osas") && suffix(R2, "osas")) {
CT = removeSuffix(CT, "osas");
return true;
}
if (suffix(CT, "ador") && suffix(R2, "ador")) {
CT = removeSuffix(CT, "ador");
return true;
}
if (suffix(CT, "ivas") && suffix(R2, "ivas")) {
CT = removeSuffix(CT, "ivas");
return true;
}
if (suffix(CT, "ivos") && suffix(R2, "ivos")) {
CT = removeSuffix(CT, "ivos");
return true;
}
if (suffix(CT, "iras") && suffix(RV, "iras") && suffixPreceded(CT, "iras", "e")) {
CT = replaceSuffix(CT, "iras", "ir");
return true;
}
}
// suffix length = 3
if (CT.length() >= 3) {
if (suffix(CT, "eza") && suffix(R2, "eza")) {
CT = removeSuffix(CT, "eza");
return true;
}
if (suffix(CT, "ico") && suffix(R2, "ico")) {
CT = removeSuffix(CT, "ico");
return true;
}
if (suffix(CT, "ica") && suffix(R2, "ica")) {
CT = removeSuffix(CT, "ica");
return true;
}
if (suffix(CT, "oso") && suffix(R2, "oso")) {
CT = removeSuffix(CT, "oso");
return true;
}
if (suffix(CT, "osa") && suffix(R2, "osa")) {
CT = removeSuffix(CT, "osa");
return true;
}
if (suffix(CT, "iva") && suffix(R2, "iva")) {
CT = removeSuffix(CT, "iva");
return true;
}
if (suffix(CT, "ivo") && suffix(R2, "ivo")) {
CT = removeSuffix(CT, "ivo");
return true;
}
if (suffix(CT, "ira") && suffix(RV, "ira") && suffixPreceded(CT, "ira", "e")) {
CT = replaceSuffix(CT, "ira", "ir");
return true;
}
}
// no ending was removed by step1
return false;
}
/**
* Verb suffixes.
*
*
Search for the longest among the following suffixes in RV, and if found, delete.
*
* @return false if no ending was removed
*/
private boolean step2() {
if (RV == null) return false;
// suffix lenght = 7
if (RV.length() >= 7) {
if (suffix(RV, "issemos")) {
CT = removeSuffix(CT, "issemos");
return true;
}
if (suffix(RV, "essemos")) {
CT = removeSuffix(CT, "essemos");
return true;
}
if (suffix(RV, "assemos")) {
CT = removeSuffix(CT, "assemos");
return true;
}
if (suffix(RV, "ariamos")) {
CT = removeSuffix(CT, "ariamos");
return true;
}
if (suffix(RV, "eriamos")) {
CT = removeSuffix(CT, "eriamos");
return true;
}
if (suffix(RV, "iriamos")) {
CT = removeSuffix(CT, "iriamos");
return true;
}
}
// suffix length = 6
if (RV.length() >= 6) {
if (suffix(RV, "iremos")) {
CT = removeSuffix(CT, "iremos");
return true;
}
if (suffix(RV, "eremos")) {
CT = removeSuffix(CT, "eremos");
return true;
}
if (suffix(RV, "aremos")) {
CT = removeSuffix(CT, "aremos");
return true;
}
if (suffix(RV, "avamos")) {
CT = removeSuffix(CT, "avamos");
return true;
}
if (suffix(RV, "iramos")) {
CT = removeSuffix(CT, "iramos");
return true;
}
if (suffix(RV, "eramos")) {
CT = removeSuffix(CT, "eramos");
return true;
}
if (suffix(RV, "aramos")) {
CT = removeSuffix(CT, "aramos");
return true;
}
if (suffix(RV, "asseis")) {
CT = removeSuffix(CT, "asseis");
return true;
}
if (suffix(RV, "esseis")) {
CT = removeSuffix(CT, "esseis");
return true;
}
if (suffix(RV, "isseis")) {
CT = removeSuffix(CT, "isseis");
return true;
}
if (suffix(RV, "arieis")) {
CT = removeSuffix(CT, "arieis");
return true;
}
if (suffix(RV, "erieis")) {
CT = removeSuffix(CT, "erieis");
return true;
}
if (suffix(RV, "irieis")) {
CT = removeSuffix(CT, "irieis");
return true;
}
}
// suffix length = 5
if (RV.length() >= 5) {
if (suffix(RV, "irmos")) {
CT = removeSuffix(CT, "irmos");
return true;
}
if (suffix(RV, "iamos")) {
CT = removeSuffix(CT, "iamos");
return true;
}
if (suffix(RV, "armos")) {
CT = removeSuffix(CT, "armos");
return true;
}
if (suffix(RV, "ermos")) {
CT = removeSuffix(CT, "ermos");
return true;
}
if (suffix(RV, "areis")) {
CT = removeSuffix(CT, "areis");
return true;
}
if (suffix(RV, "ereis")) {
CT = removeSuffix(CT, "ereis");
return true;
}
if (suffix(RV, "ireis")) {
CT = removeSuffix(CT, "ireis");
return true;
}
if (suffix(RV, "asses")) {
CT = removeSuffix(CT, "asses");
return true;
}
if (suffix(RV, "esses")) {
CT = removeSuffix(CT, "esses");
return true;
}
if (suffix(RV, "isses")) {
CT = removeSuffix(CT, "isses");
return true;
}
if (suffix(RV, "astes")) {
CT = removeSuffix(CT, "astes");
return true;
}
if (suffix(RV, "assem")) {
CT = removeSuffix(CT, "assem");
return true;
}
if (suffix(RV, "essem")) {
CT = removeSuffix(CT, "essem");
return true;
}
if (suffix(RV, "issem")) {
CT = removeSuffix(CT, "issem");
return true;
}
if (suffix(RV, "ardes")) {
CT = removeSuffix(CT, "ardes");
return true;
}
if (suffix(RV, "erdes")) {
CT = removeSuffix(CT, "erdes");
return true;
}
if (suffix(RV, "irdes")) {
CT = removeSuffix(CT, "irdes");
return true;
}
if (suffix(RV, "ariam")) {
CT = removeSuffix(CT, "ariam");
return true;
}
if (suffix(RV, "eriam")) {
CT = removeSuffix(CT, "eriam");
return true;
}
if (suffix(RV, "iriam")) {
CT = removeSuffix(CT, "iriam");
return true;
}
if (suffix(RV, "arias")) {
CT = removeSuffix(CT, "arias");
return true;
}
if (suffix(RV, "erias")) {
CT = removeSuffix(CT, "erias");
return true;
}
if (suffix(RV, "irias")) {
CT = removeSuffix(CT, "irias");
return true;
}
if (suffix(RV, "estes")) {
CT = removeSuffix(CT, "estes");
return true;
}
if (suffix(RV, "istes")) {
CT = removeSuffix(CT, "istes");
return true;
}
if (suffix(RV, "areis")) {
CT = removeSuffix(CT, "areis");
return true;
}
if (suffix(RV, "aveis")) {
CT = removeSuffix(CT, "aveis");
return true;
}
}
// suffix length = 4
if (RV.length() >= 4) {
if (suffix(RV, "aria")) {
CT = removeSuffix(CT, "aria");
return true;
}
if (suffix(RV, "eria")) {
CT = removeSuffix(CT, "eria");
return true;
}
if (suffix(RV, "iria")) {
CT = removeSuffix(CT, "iria");
return true;
}
if (suffix(RV, "asse")) {
CT = removeSuffix(CT, "asse");
return true;
}
if (suffix(RV, "esse")) {
CT = removeSuffix(CT, "esse");
return true;
}
if (suffix(RV, "isse")) {
CT = removeSuffix(CT, "isse");
return true;
}
if (suffix(RV, "aste")) {
CT = removeSuffix(CT, "aste");
return true;
}
if (suffix(RV, "este")) {
CT = removeSuffix(CT, "este");
return true;
}
if (suffix(RV, "iste")) {
CT = removeSuffix(CT, "iste");
return true;
}
if (suffix(RV, "arei")) {
CT = removeSuffix(CT, "arei");
return true;
}
if (suffix(RV, "erei")) {
CT = removeSuffix(CT, "erei");
return true;
}
if (suffix(RV, "irei")) {
CT = removeSuffix(CT, "irei");
return true;
}
if (suffix(RV, "aram")) {
CT = removeSuffix(CT, "aram");
return true;
}
if (suffix(RV, "eram")) {
CT = removeSuffix(CT, "eram");
return true;
}
if (suffix(RV, "iram")) {
CT = removeSuffix(CT, "iram");
return true;
}
if (suffix(RV, "avam")) {
CT = removeSuffix(CT, "avam");
return true;
}
if (suffix(RV, "arem")) {
CT = removeSuffix(CT, "arem");
return true;
}
if (suffix(RV, "erem")) {
CT = removeSuffix(CT, "erem");
return true;
}
if (suffix(RV, "irem")) {
CT = removeSuffix(CT, "irem");
return true;
}
if (suffix(RV, "ando")) {
CT = removeSuffix(CT, "ando");
return true;
}
if (suffix(RV, "endo")) {
CT = removeSuffix(CT, "endo");
return true;
}
if (suffix(RV, "indo")) {
CT = removeSuffix(CT, "indo");
return true;
}
if (suffix(RV, "arao")) {
CT = removeSuffix(CT, "arao");
return true;
}
if (suffix(RV, "erao")) {
CT = removeSuffix(CT, "erao");
return true;
}
if (suffix(RV, "irao")) {
CT = removeSuffix(CT, "irao");
return true;
}
if (suffix(RV, "adas")) {
CT = removeSuffix(CT, "adas");
return true;
}
if (suffix(RV, "idas")) {
CT = removeSuffix(CT, "idas");
return true;
}
if (suffix(RV, "aras")) {
CT = removeSuffix(CT, "aras");
return true;
}
if (suffix(RV, "eras")) {
CT = removeSuffix(CT, "eras");
return true;
}
if (suffix(RV, "iras")) {
CT = removeSuffix(CT, "iras");
return true;
}
if (suffix(RV, "avas")) {
CT = removeSuffix(CT, "avas");
return true;
}
if (suffix(RV, "ares")) {
CT = removeSuffix(CT, "ares");
return true;
}
if (suffix(RV, "eres")) {
CT = removeSuffix(CT, "eres");
return true;
}
if (suffix(RV, "ires")) {
CT = removeSuffix(CT, "ires");
return true;
}
if (suffix(RV, "ados")) {
CT = removeSuffix(CT, "ados");
return true;
}
if (suffix(RV, "idos")) {
CT = removeSuffix(CT, "idos");
return true;
}
if (suffix(RV, "amos")) {
CT = removeSuffix(CT, "amos");
return true;
}
if (suffix(RV, "emos")) {
CT = removeSuffix(CT, "emos");
return true;
}
if (suffix(RV, "imos")) {
CT = removeSuffix(CT, "imos");
return true;
}
if (suffix(RV, "iras")) {
CT = removeSuffix(CT, "iras");
return true;
}
if (suffix(RV, "ieis")) {
CT = removeSuffix(CT, "ieis");
return true;
}
}
// suffix length = 3
if (RV.length() >= 3) {
if (suffix(RV, "ada")) {
CT = removeSuffix(CT, "ada");
return true;
}
if (suffix(RV, "ida")) {
CT = removeSuffix(CT, "ida");
return true;
}
if (suffix(RV, "ara")) {
CT = removeSuffix(CT, "ara");
return true;
}
if (suffix(RV, "era")) {
CT = removeSuffix(CT, "era");
return true;
}
if (suffix(RV, "ira")) {
CT = removeSuffix(CT, "ava");
return true;
}
if (suffix(RV, "iam")) {
CT = removeSuffix(CT, "iam");
return true;
}
if (suffix(RV, "ado")) {
CT = removeSuffix(CT, "ado");
return true;
}
if (suffix(RV, "ido")) {
CT = removeSuffix(CT, "ido");
return true;
}
if (suffix(RV, "ias")) {
CT = removeSuffix(CT, "ias");
return true;
}
if (suffix(RV, "ais")) {
CT = removeSuffix(CT, "ais");
return true;
}
if (suffix(RV, "eis")) {
CT = removeSuffix(CT, "eis");
return true;
}
if (suffix(RV, "ira")) {
CT = removeSuffix(CT, "ira");
return true;
}
if (suffix(RV, "ear")) {
CT = removeSuffix(CT, "ear");
return true;
}
}
// suffix length = 2
if (RV.length() >= 2) {
if (suffix(RV, "ia")) {
CT = removeSuffix(CT, "ia");
return true;
}
if (suffix(RV, "ei")) {
CT = removeSuffix(CT, "ei");
return true;
}
if (suffix(RV, "am")) {
CT = removeSuffix(CT, "am");
return true;
}
if (suffix(RV, "em")) {
CT = removeSuffix(CT, "em");
return true;
}
if (suffix(RV, "ar")) {
CT = removeSuffix(CT, "ar");
return true;
}
if (suffix(RV, "er")) {
CT = removeSuffix(CT, "er");
return true;
}
if (suffix(RV, "ir")) {
CT = removeSuffix(CT, "ir");
return true;
}
if (suffix(RV, "as")) {
CT = removeSuffix(CT, "as");
return true;
}
if (suffix(RV, "es")) {
CT = removeSuffix(CT, "es");
return true;
}
if (suffix(RV, "is")) {
CT = removeSuffix(CT, "is");
return true;
}
if (suffix(RV, "eu")) {
CT = removeSuffix(CT, "eu");
return true;
}
if (suffix(RV, "iu")) {
CT = removeSuffix(CT, "iu");
return true;
}
if (suffix(RV, "iu")) {
CT = removeSuffix(CT, "iu");
return true;
}
if (suffix(RV, "ou")) {
CT = removeSuffix(CT, "ou");
return true;
}
}
// no ending was removed by step2
return false;
}
/** Delete suffix 'i' if in RV and preceded by 'c' */
private void step3() {
if (RV == null) return;
if (suffix(RV, "i") && suffixPreceded(RV, "i", "c")) {
CT = removeSuffix(CT, "i");
}
}
/**
* Residual suffix
*
*
If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it
*/
private void step4() {
if (RV == null) return;
if (suffix(RV, "os")) {
CT = removeSuffix(CT, "os");
return;
}
if (suffix(RV, "a")) {
CT = removeSuffix(CT, "a");
return;
}
if (suffix(RV, "i")) {
CT = removeSuffix(CT, "i");
return;
}
if (suffix(RV, "o")) {
CT = removeSuffix(CT, "o");
return;
}
}
/**
* If the word ends with one of ( e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with
* the 'u' (or 'i') in RV, delete the 'u' (or 'i')
*
*
Or if the word ends ç remove the cedilha
*/
private void step5() {
if (RV == null) return;
if (suffix(RV, "e")) {
if (suffixPreceded(RV, "e", "gu")) {
CT = removeSuffix(CT, "e");
CT = removeSuffix(CT, "u");
return;
}
if (suffixPreceded(RV, "e", "ci")) {
CT = removeSuffix(CT, "e");
CT = removeSuffix(CT, "i");
return;
}
CT = removeSuffix(CT, "e");
return;
}
}
/**
* For log and debug purpose
*
* @return TERM, CT, RV, R1 and R2
*/
String log() {
return " (TERM = "
+ TERM
+ ")"
+ " (CT = "
+ CT
+ ")"
+ " (RV = "
+ RV
+ ")"
+ " (R1 = "
+ R1
+ ")"
+ " (R2 = "
+ R2
+ ")";
}
}