All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.rules.ru.RussianVerbConjugationRule Maven / Gradle / Ivy

The newest version!
/* LanguageTool, a natural language style checker
 * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules.ru;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.Categories;
import org.languagetool.rules.Example;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RussianVerbConjugationRule extends Rule {

    private static final Pattern PRONOUN = Pattern.compile("PNN:(.*):Nom:(.*)");
    private static final Pattern FUT_REAL_VERB = Pattern.compile("VB:(Fut|Real):(.*):(.*):(.*):(.*)");
    private static final Pattern PAST_VERB = Pattern.compile("VB:Past:(.*):(.*):(.*)");
    
    public RussianVerbConjugationRule(ResourceBundle messages) {
        super(messages);
        super.setCategory(Categories.GRAMMAR.getCategory(messages));
        addExamplePair(Example.wrong("Я идёт."),
        Example.fixed("Я иду."));
    }

    @Override
    public String getId() {
        return "RU_VERB_CONJUGATION";
    }

    @Override
    public String getDescription() {
        return "Согласование личных местоимений с глаголами";
    }

    @Override
    public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
        List ruleMatches = new ArrayList<>();
        AnalyzedTokenReadings[] tokenReadings = sentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokenReadings.length - 1; i++) {
	    AnalyzedTokenReadings previousReading = tokenReadings[i-1];	
            AnalyzedTokenReadings currentReading = tokenReadings[i];
            AnalyzedTokenReadings nextReading = tokenReadings[i + 1];
	    AnalyzedToken previousLemmaTok = previousReading.getReadings().get(0);
            AnalyzedToken currentLemmaTok = currentReading.getReadings().get(0);
            String previousToken = previousLemmaTok.getToken();
	    String currentToken = currentLemmaTok.getToken();
            String currentPosTag = currentLemmaTok.getPOSTag();
            if (currentToken != null && currentPosTag != null && !currentToken.isEmpty() && !currentPosTag.isEmpty()) {
                Matcher pronounMatcher = PRONOUN.matcher(currentPosTag);
                if ((pronounMatcher.find()) && !(previousToken.equals("и")))  {
                    Pair pronounPair = new ImmutablePair<>(pronounMatcher.group(1), pronounMatcher.group(2));
                    AnalyzedToken nextLemmaTok = nextReading.getReadings().get(0);
                    String next2Token;
                    if (i < tokenReadings.length - 2) {
                    AnalyzedTokenReadings next2Reading = tokenReadings[i + 2];
                    AnalyzedToken next2LemmaTok = next2Reading.getReadings().get(0);
                    next2Token = next2LemmaTok.getToken();
                    } else  {
                            next2Token = "";
                            }
                    String nextToken = nextLemmaTok.getToken();
                    String nextPosTag = nextLemmaTok.getPOSTag();
                    if(nextPosTag != null && !(nextPosTag.isEmpty()) && !(next2Token.equals("быть") && nextToken.equals("может")) && !(nextToken.equals("целую")) ) {  //  "может быть"
                        Matcher verbMatcher = FUT_REAL_VERB.matcher(nextPosTag);
                        if (verbMatcher.find()) {
                            Pair verbPair = new ImmutablePair<>(verbMatcher.group(4), verbMatcher.group(5));
                            if (isConjugationInPresentOrFutureWrong(pronounPair, verbPair)) {
                                addRuleMatch(ruleMatches, currentReading, nextReading, sentence);
                            }
                        } else {
                            verbMatcher = PAST_VERB.matcher(nextPosTag);
                            if (verbMatcher.find()) {
                                if (isConjugationInPastWrong(pronounMatcher.group(1), verbMatcher.group(3))) {
                                    addRuleMatch(ruleMatches, currentReading, nextReading, sentence);
                                }
                            }
                        }
                    }
                }
            }
        }
        return toRuleMatchArray(ruleMatches);
    }

    private boolean isConjugationInPresentOrFutureWrong(Pair pronoun, Pair verb) {
        if (!pronoun.getRight().equals(verb.getRight())) {
            return true;
        }
        if (Arrays.asList("Masc", "Fem", "Neut").contains(pronoun.getLeft())) {
            return "PL".equals(verb.getLeft());
        }
        return !pronoun.getLeft().equals(verb.getLeft());
    }

    private boolean isConjugationInPastWrong(String pronoun, String verb) {
        if ("Sin".equals(pronoun)) {
            return "PL".equals(verb) || "Neut".equals(verb);
        }
        return !pronoun.equals(verb);
    }

    private void addRuleMatch(List ruleMatches, AnalyzedTokenReadings currentReading, AnalyzedTokenReadings nextReading, AnalyzedSentence sentence) {
        RuleMatch ruleMatch = new RuleMatch(this, sentence, currentReading.getStartPos(), nextReading.getEndPos(), "Неверное спряжение глагола или неверное местоимение", getShort());
        ruleMatches.add(ruleMatch);
    }

    protected String getShort() {
        return "Неверное спряжение глагола";
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy