Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/* LanguageTool, a natural language style checker
* Copyright (C) 2013 Andriy Rysin
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.uk;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.ResourceBundle;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.rules.Categories;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.tagging.uk.PosTagHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A rule that checks if noun and verb agree
*
* @author Andriy Rysin
* @since 3.6
*/
public class TokenAgreementNounVerbRule extends Rule {
private static final Logger logger = LoggerFactory.getLogger(TokenAgreementNounVerbRule.class);
private static final Pattern NOUN_V_NAZ_PATTERN = Pattern.compile("noun.*:v_naz.*");
public TokenAgreementNounVerbRule(ResourceBundle messages) throws IOException {
super.setCategory(Categories.MISC.getCategory(messages));
// setDefaultOff();
}
@Override
public final String getId() {
return "UK_NOUN_VERB_INFLECTION_AGREEMENT";
}
@Override
public String getDescription() {
return "Узгодження іменника та дієслова за родом, числом та особою";
}
public String getShort() {
return "Узгодження іменника з дієсловом";
}
/**
* Indicates if the rule is case-sensitive.
* @return true if the rule is case-sensitive, false otherwise.
*/
public boolean isCaseSensitive() {
return false;
}
private static class State {
int nounPos;
List nounTokenReadings = new ArrayList<>();
AnalyzedTokenReadings nounAnalyzedTokenReadings = null;
List adjTokenReadings = new ArrayList<>();
}
@Override
public final RuleMatch[] match(AnalyzedSentence sentence) {
List ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
State state = null;
for (int i = 1; i < tokens.length; i++) {
AnalyzedTokenReadings tokenReadings = tokens[i];
String posTag0 = tokenReadings.getAnalyzedToken(0).getPOSTag();
if( posTag0 == null ) {
state = null;
continue;
}
if( state == null ) {
// no need to start checking on last token or if no noun
if( i == tokens.length - 1 )
continue;
}
// if( LemmaHelper.hasLemma(tokenReadings, Arrays.asList("як")) ) {
// state = null;
// continue;
// }
if( PosTagHelper.hasPosTag(tokenReadings, NOUN_V_NAZ_PATTERN)
|| Arrays.asList("яка").contains(tokenReadings.getToken()) ) {
state = new State();
for (AnalyzedToken token: tokenReadings) {
String nounPosTag = token.getPOSTag();
if( nounPosTag == null ) { // can happen for words with \u0301 or \u00AD
continue;
}
// if( nounPosTag.startsWith("<") ) {
// state = null;
// break;
// }
if( "який".equals(token.getLemma()) && token.getPOSTag().contains(":f:v_naz") ) {
state.nounPos = i;
state.nounTokenReadings.add(token);
state.nounAnalyzedTokenReadings = tokenReadings;
}
else if( nounPosTag.startsWith("noun") && nounPosTag.contains("v_naz") ) {
state.nounPos = i;
state.nounTokenReadings.add(token);
state.nounAnalyzedTokenReadings = tokenReadings;
}
else if( nounPosTag.startsWith("noun") && nounPosTag.contains("v_kly") ) {
// ignore
}
else if( token.getPOSTag().matches("adj:.:(v_naz|v_kly).*")
|| (token.getPOSTag().startsWith("adj:m:v_zna:rinanim")
&& ! PosTagHelper.hasPosTagStart(tokens[i-1], "prep"))
&& ! Arrays.asList("кожен", "інший", "старий", "черговий").contains(token.getToken().toLowerCase()) ) {
state.adjTokenReadings.add(token);
}
else {
state = null;
break;
}
}
continue;
}
if( state == null )
continue;
if( Arrays.asList("не", "б", "би", "бодай").contains(tokenReadings.getToken()) )
continue;
if( PosTagHelper.hasPosTagPartAll(tokenReadings, "adv") )
continue;
// see if we get a following verb
// System.err.println("Check for verb: " + tokenReadings);
List verbTokenReadings = new ArrayList<>();
for (AnalyzedToken token: tokenReadings) {
String verbPosTag = token.getPOSTag();
if( verbPosTag == null // can happen for words with \u0301 or \u00AD
|| verbPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME)
|| verbPosTag.equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
continue;
}
if( verbPosTag.startsWith("<") ) {
verbTokenReadings.clear();
break;
}
if( verbPosTag.startsWith("verb") ) {
verbTokenReadings.add(token);
}
else {
verbTokenReadings.clear();
break;
}
}
// no slave token - restart
if( verbTokenReadings.isEmpty() ) {
state = null;
continue;
}
logger.debug("=== Checking\n\t{}\n\t{}", state.nounTokenReadings, verbTokenReadings);
// perform the check
List masterInflections = VerbInflectionHelper.getNounInflections(state.nounTokenReadings);
List slaveInflections = VerbInflectionHelper.getVerbInflections(verbTokenReadings);
logger.debug("\t\t{}\n\t{}", masterInflections, slaveInflections);
if( Collections.disjoint(masterInflections, slaveInflections) ) {
if( TokenAgreementNounVerbExceptionHelper.isException(tokens, state.nounPos, i, masterInflections, slaveInflections, state.nounTokenReadings, verbTokenReadings)) {
state.nounTokenReadings.clear();
break;
}
if( logger.isDebugEnabled() ) {
logger.debug(MessageFormat.format("=== Found noun/verb mismatch\n\t{0}\n\t{1}",
state.nounAnalyzedTokenReadings.getToken() + ": " + masterInflections + " // " + state.nounAnalyzedTokenReadings,
verbTokenReadings.get(0).getToken() + ": " + slaveInflections+ " // " + verbTokenReadings));
}
String msg = String.format("Не узгоджено %s з дієсловом: \"%s\" (%s) і \"%s\" (%s)",
LemmaHelper.hasLemma(state.nounTokenReadings, Arrays.asList("який")) ? "займенник" : "іменник",
state.nounTokenReadings.get(0).getToken(), formatInflections(masterInflections, true),
verbTokenReadings.get(0).getToken(), formatInflections(slaveInflections, false));
RuleMatch potentialRuleMatch = new RuleMatch(this, sentence, state.nounAnalyzedTokenReadings.getStartPos(), tokenReadings.getEndPos(), msg, getShort());
ruleMatches.add(potentialRuleMatch);
}
state = null;
}
return toRuleMatchArray(ruleMatches);
}
private static String formatInflections(List inflections, boolean noun) {
Collections.sort(inflections);
List list = new ArrayList<>();
for (VerbInflectionHelper.Inflection inflection : inflections) {
String str = "";
if (inflection.gender != null) {
str = PosTagHelper.GENDER_MAP.get(inflection.gender);
}
else {
if( inflection.person != null ) {
str = PosTagHelper.PERSON_MAP.get(inflection.person);
}
if( inflection.plural != null ) {
if( str.length() > 0 ) {
str += " ";
}
str += PosTagHelper.GENDER_MAP.get(inflection.plural);
}
}
list.add(str);
}
LinkedHashSet uniqeList = new LinkedHashSet<>(list);
return StringUtils.join(uniqeList, ", ");
}
}