All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.rules.pt.PostReformPortugueseCompoundRule Maven / Gradle / Ivy

/* LanguageTool, a natural language style checker 
 * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules.pt;

import org.apache.commons.lang3.StringUtils;
import org.languagetool.Language;
import org.languagetool.UserConfig;
import org.languagetool.rules.*;
import org.languagetool.tools.Tools;

import java.io.IOException;
import java.net.URL;
import java.util.ResourceBundle;
import java.util.regex.Pattern;

import static java.util.regex.Pattern.compile;

/**
 * Checks that compounds (if in the list) are not written as separate words.
 * @since 2.6
 */
public class PostReformPortugueseCompoundRule extends AbstractCompoundRule {

  private static final Pattern HYPHEN = compile("-");
  private static final Pattern VOWEL = compile("(?i).+[aeiou]$");
  private static final Pattern RS = compile("(?i)^[rs].+");

  private static volatile CompoundRuleData compoundData;

  public PostReformPortugueseCompoundRule(ResourceBundle messages, Language lang, UserConfig userConfig) throws IOException {    
    super(messages, lang, userConfig,
            "Esta palavra é hifenizada.",
            "Esta palavra é composta por justaposição.",
            "Esta palavra pode ser composta por justaposição ou hifenizada.",
            "Este conjunto forma uma palavra composta.");
    super.setCategory(Categories.COMPOUNDING.getCategory(messages));
    setLocQualityIssueType(ITSIssueType.Grammar);
    useSubRuleSpecificIds();
  }

  @Override
  public String getId() {
    return "PT_COMPOUNDS_POST_REFORM";
  }

  @Override
  public String getDescription() {
    return "Erro na formação da palavra composta \"$match\"";
  }

  @Override
  public URL getUrl() {
    return Tools.getUrl("https://pt.wikipedia.org/wiki/Lista_das_alterações_previstas_pelo_acordo_ortográfico_de_1990");
  }

  @Override
  public CompoundRuleData getCompoundRuleData() {
    CompoundRuleData data = compoundData;
    if (data == null) {
      synchronized (PostReformPortugueseCompoundRule.class) {
        data = compoundData;
        if (data == null) {
          compoundData = data = new CompoundRuleData("/pt/post-reform-compounds.txt");
        }
      }
    }

    return data;
  }

  // This override is here to account for Portuguese transformations required as per the latest orthography:
  // ultra + som  => ultrassom (with  turned into  to keep the sound).
  @Override
  public String mergeCompound(String str, boolean uncapitalizeMidWords) {
    String[] stringParts = HYPHEN.matcher(str).replaceAll(" ").split(" ");
    StringBuilder sb = new StringBuilder();
    for (int k = 0; k < stringParts.length; k++) {
      if (k == 0) {
        sb.append(stringParts[0]);
      } else {
        // if previous element ends in vowel and current one starts with  or , we need to double the letter into
        // a digraph that creates the sound we want
        if (VOWEL.matcher(stringParts[k - 1]).matches() && RS.matcher(stringParts[k]).matches()) {
          stringParts[k] = stringParts[k].charAt(0) + stringParts[k];
        }
        sb.append(uncapitalizeMidWords ? StringUtils.uncapitalize(stringParts[k]) : stringParts[k]);
      }
    }
    return sb.toString();
  }
}