org.languagetool.rules.ca.SimpleReplaceVerbsRule Maven / Gradle / Ivy
/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.ca;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.rules.*;
import org.languagetool.synthesis.ca.CatalanSynthesizer;
import org.languagetool.tagging.ca.CatalanTagger;
import org.languagetool.tools.StringTools;
/**
* A rule that matches incorrect verbs (including all inflected forms) and
* suggests correct ones instead.
* Loads the relevant words from rules/ca/replace_verbs.txt
.
*
* @author Jaume Ortolà
*/
public class SimpleReplaceVerbsRule extends AbstractSimpleReplaceRule {
private static final Map> wrongWords = loadFromPath("/ca/replace_verbs.txt");
private static final Locale CA_LOCALE = new Locale("CA");
@Override
public Map> getWrongWords() {
return wrongWords;
}
// totes les terminacions possibles dels verbs: cantar, servir, conèixer, desmerèixer
private static final String endings = "a|ada|ades|am|ant|ar|ara|aran|arem|aren|ares|areu|aria|arien|aries|arà|aràs|aré|aríem|aríeu|assen|asses|assin|assis|at|ats|au|ava|aven|aves|e|ec|ega|eguda|egudes|eguem|eguen|eguera|egueren|egueres|egues|eguessen|eguesses|eguessin|eguessis|egueu|egui|eguin|eguis|egut|eguts|egué|eguérem|eguéreu|egués|eguéssem|eguésseu|eguéssim|eguéssiu|eguí|eix|eixem|eixen|eixent|eixeran|eixerem|eixeren|eixeres|eixereu|eixeria|eixerien|eixeries|eixerà|eixeràs|eixeré|eixeríem|eixeríeu|eixes|eixessen|eixesses|eixessin|eixessis|eixeu|eixi|eixia|eixien|eixies|eixin|eixis|eixo|eixé|eixérem|eixéreu|eixés|eixéssem|eixésseu|eixéssim|eixéssiu|eixí|eixíem|eixíeu|em|en|es|esc|esca|escuda|escudes|escut|escuts|esquem|esquen|esquera|esqueren|esqueres|esques|esquessen|esquesses|esquessin|esquessis|esqueu|esqui|esquin|esquis|esqué|esquérem|esquéreu|esqués|esquéssem|esquésseu|esquéssim|esquéssiu|esquí|essen|esses|essin|essis|eu|i|ia|ida|ides|ien|ies|iguem|igueu|im|in|int|ir|ira|iran|irem|iren|ires|ireu|iria|irien|iries|irà|iràs|iré|iríem|iríeu|is|isc|isca|isquen|isques|issen|isses|issin|issis|it|its|iu|ix|ixen|ixes|o|à|àrem|àreu|às|àssem|àsseu|àssim|àssiu|àvem|àveu|éixer|és|éssem|ésseu|éssim|éssiu|í|íem|íeu|írem|íreu|ís|íssem|ísseu|íssim|íssiu|ïs";
private static final Pattern desinencies_1conj_0 = Pattern.compile("(.+?)(" + endings + ")");
private static final Pattern desinencies_1conj_1 = Pattern.compile("(.+)(" + endings + ")");
private final CatalanTagger tagger;
private final CatalanSynthesizer synth;
public SimpleReplaceVerbsRule(final ResourceBundle messages, Language language) {
super(messages, language);
super.setCategory(Categories.TYPOS.getCategory(messages));
super.setLocQualityIssueType(ITSIssueType.Misspelling);
super.setIgnoreTaggedWords();
tagger = (CatalanTagger) language.getTagger();
synth = (CatalanSynthesizer) language.getSynthesizer();
super.useSubRuleSpecificIds();
}
@Override
public final String getId() {
return "CA_SIMPLE_REPLACE_VERBS";
}
@Override
public String getDescription() {
return "Verb incorrecte: $match";
}
@Override
public String getShort() {
return "Verb incorrecte";
}
@Override
public String getMessage(String tokenStr, List replacements) {
return "Verb incorrecte.";
}
@Override
public Locale getLocale() {
return CA_LOCALE;
}
@Override
public final RuleMatch[] match(final AnalyzedSentence sentence) {
List ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
for (AnalyzedTokenReadings tokenReadings : tokens) {
String originalTokenStr = tokenReadings.getToken();
if (ignoreTaggedWords && tokenReadings.isTagged()) {
continue;
}
String tokenString = originalTokenStr.toLowerCase(getLocale());
AnalyzedTokenReadings analyzedTokenReadings = null;
String infinitive = null;
int i = 0;
while (i < 2 && analyzedTokenReadings == null) {
Matcher m;
if (i == 0) {
m = desinencies_1conj_0.matcher(tokenString);
} else {
m = desinencies_1conj_1.matcher(tokenString);
}
if (m.matches()) {
String lexeme = m.group(1);
String desinence = m.group(2);
if (desinence.startsWith("e") || desinence.startsWith("é") || desinence.startsWith("i")
|| desinence.startsWith("ï")) {
if (lexeme.endsWith("c")) {
lexeme = lexeme.substring(0, lexeme.length() - 1).concat("ç");
} else if (lexeme.endsWith("qu")) {
lexeme = lexeme.substring(0, lexeme.length() - 2).concat("c");
} else if (lexeme.endsWith("g")) {
lexeme = lexeme.substring(0, lexeme.length() - 1).concat("j");
} else if (lexeme.endsWith("gü")) {
lexeme = lexeme.substring(0, lexeme.length() - 2).concat("gu");
} else if (lexeme.endsWith("gu")) {
lexeme = lexeme.substring(0, lexeme.length() - 2).concat("g");
}
}
if (desinence.startsWith("ï")) {
desinence = "i" + desinence.substring(1);
}
infinitive = lexeme.concat("ar");
if (wrongWords.containsKey(infinitive)) {
List wordAsArray = Arrays.asList("cant".concat(desinence)); // cantar
List analyzedTokenReadingsList = tagger.tag(wordAsArray);
if (analyzedTokenReadingsList.get(0).getAnalyzedToken(0).getPOSTag() != null) {
analyzedTokenReadings = analyzedTokenReadingsList.get(0);
}
}
if (analyzedTokenReadings == null) {
infinitive = lexeme.concat("ir");
if (wrongWords.containsKey(infinitive)) {
List wordAsArray = Arrays.asList("serv".concat(desinence)); // servir
List analyzedTokenReadingsList = tagger.tag(wordAsArray);
if (analyzedTokenReadingsList.get(0).getAnalyzedToken(0).getPOSTag() != null) {
analyzedTokenReadings = analyzedTokenReadingsList.get(0);
}
}
}
if (analyzedTokenReadings == null && lexeme.endsWith("g")) {
infinitive = lexeme.concat("uir");
if (wrongWords.containsKey(infinitive)) {
List wordAsArray = Arrays.asList("serv".concat(desinence)); // servir
List analyzedTokenReadingsList = tagger.tag(wordAsArray);
if (analyzedTokenReadingsList.get(0).getAnalyzedToken(0).getPOSTag() != null) {
analyzedTokenReadings = analyzedTokenReadingsList.get(0);
}
}
}
if (analyzedTokenReadings == null) {
infinitive = lexeme.concat("èixer");
if (wrongWords.containsKey(infinitive)) {
List wordAsArray = Arrays.asList("con".concat(desinence)); // conèixer
List analyzedTokenReadingsList = tagger.tag(wordAsArray);
if (analyzedTokenReadingsList.get(0).getAnalyzedToken(0).getPOSTag() != null) {
analyzedTokenReadings = analyzedTokenReadingsList.get(0);
} else {
List wordAsArray2 = Arrays.asList("desmer".concat(desinence)); // desmerèixer
List analyzedTokenReadingsList2 = tagger.tag(wordAsArray2);
if (analyzedTokenReadingsList2.get(0).getAnalyzedToken(0).getPOSTag() != null) {
analyzedTokenReadings = analyzedTokenReadingsList2.get(0);
}
}
}
}
}
i++;
}
// synthesize replacements
if (analyzedTokenReadings != null) {
List possibleReplacements = new ArrayList<>();
String[] synthesized;
List replacementInfinitives = wrongWords.get(infinitive);
for (String replacementInfinitive : replacementInfinitives) {
if (replacementInfinitive.startsWith("(")) {
possibleReplacements.add(StringTools.preserveCase(replacementInfinitive, originalTokenStr));
} else {
// the first part is the verb
String[] parts = replacementInfinitive.split(" ");
AnalyzedToken infinitiveAsAnTkn = new AnalyzedToken(parts[0], "V.*", parts[0]);
for (AnalyzedToken analyzedToken : analyzedTokenReadings) {
try {
String posTag = analyzedToken.getPOSTag();
if (infinitiveAsAnTkn.getLemma().equals("haver")) {
posTag = "VA" + posTag.substring(2);
}
synthesized = synth.synthesize(infinitiveAsAnTkn, posTag);
} catch (IOException e) {
throw new RuntimeException(
"Could not synthesize: " + infinitiveAsAnTkn + " with tag " + analyzedToken.getPOSTag(), e);
}
for (String s : synthesized) {
for (int j = 1; j < parts.length; j++) {
s = s.concat(" ").concat(parts[j]);
}
String ps = StringTools.preserveCase(s, originalTokenStr);
if (!possibleReplacements.contains(ps)) {
possibleReplacements.add(ps);
}
}
}
}
}
if (possibleReplacements.size() > 0) {
RuleMatch potentialRuleMatch = createRuleMatch(tokenReadings, possibleReplacements, sentence, infinitive);
ruleMatches.add(potentialRuleMatch);
}
}
}
return toRuleMatchArray(ruleMatches);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy