org.cogroo.tools.checker.checkers.RepetitionChecker Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cogroo-gc Show documentation
Show all versions of cogroo-gc Show documentation
Annotators specialized in grammar checking.
/**
* Copyright (C) 2012 cogroo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.tools.checker.checkers;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.cogroo.entities.Mistake;
import org.cogroo.entities.Sentence;
import org.cogroo.entities.Token;
import org.cogroo.tools.checker.AbstractTypedChecker;
import org.cogroo.tools.checker.JavaRuleDefinition;
import org.cogroo.tools.checker.RuleDefinition;
import org.cogroo.tools.checker.rules.model.Example;
import org.cogroo.tools.checker.rules.model.TagMask.Class;
/**
*
* The RepetitionChecker class, looks in a sentence for repeated subsequent
* words. Except when the first word is "se" meaning the subordinate
* conjunction, followed by the word "se" meaning the personal pronoun.
*
*
* Indicates false errors when dealing with contractions, for example: "em no (em + o)", "por pelo (por + o)"
*
*
*/
public class RepetitionChecker extends AbstractTypedChecker {
public RepetitionChecker() {
List examples = new ArrayList();
examples.add(createExample("Ele ele foi ao mercado.",
"Ele foi ao mercado."));
RuleDefinition repetition = new JavaRuleDefinition(ID, CATEGORY,
GROUP, DESCRIPTION, MESSAGE, SHORT, examples);
add(repetition);
}
private static final String ID_PREFIX = "repetition:";
static final String ID = ID_PREFIX + "DUPLICATED_TOKEN";
static final String CATEGORY = "Erros mecânicos";
static final String GROUP = "Repetição de palavras";
static final String DESCRIPTION = "Procura por palavras consecutivas repetidas.";
static final String MESSAGE = "Verifique a repetição de palavras.";
static final String SHORT = "Repetição de palavras.";
public String getIdPrefix() {
return ID_PREFIX;
}
public List check(Sentence sentence) {
List mistakes = new LinkedList();
int offset = sentence.getSpan().getStart();
List tokens = sentence.getTokens();
String token = tokens.get(0).getLexeme().toLowerCase();
for (int i = 1; i < tokens.size(); i++) {
String next = tokens.get(i).getLexeme().toLowerCase();
if (token.equals(next) && !isException(tokens, i)) {
int start = tokens.get(i-1).getSpan().getStart() + offset;
int end = tokens.get(i).getSpan().getEnd() + offset;
mistakes.add(createMistake(ID, createSuggestion(tokens.get(i-1)
.getLexeme()), start, end, sentence.getSentence()));
}
token = next;
}
return mistakes;
}
private boolean isException(List tokens, int i) {
String word = tokens.get(i).getLexeme().toLowerCase();
Class first = tokens.get(i - 1).getMorphologicalTag().getClazzE();
Class second = tokens.get(i).getMorphologicalTag().getClazzE();
int startFirst = tokens.get(i - 1).getSpan().getStart();
int startSecond = tokens.get(i).getSpan().getStart();
if(startFirst == startSecond) { // contraction
return true;
}
if (word.equals("se")) {
if (first.equals(Class.SUBORDINATING_CONJUNCTION)
&& second.equals(Class.PERSONAL_PRONOUN))
return true;
return false;
}
if (word.equals("a")) {
if (first.equals(Class.PREPOSITION)
&& second.equals(Class.ARTICLE))
return true;
return false;
}
// TODO Exception:
// Os livros devem ser devolvidos em no máximo dez dias.
// if (word.equals("em")) {
// return true;
// }
// TODO Exception:
// As linhas E e F do metrô serão interditadas.
// if (word.equals("e")) {
// return true;
// }
// TODO: Remove this exception.
if (word.equals("1")) {
return true;
}
return false;
}
private String[] createSuggestion(String error) {
String[] array = { error };
return array;
}
public int getPriority() {
return 190;
}
}