All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.rules.Rule Maven / Gradle / Ivy

Go to download

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.5
Show newest version
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules;

import java.io.IOException;
import java.net.URL;
import java.util.*;

import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;

/**
 * Abstract rule class. A Rule describes a language error and can test whether a
 * given pre-analyzed text contains that error using the {@link Rule#match(AnalyzedSentence)}
 * method.
 *
 * 

Rules are created whenever a {@link JLanguageTool} or * a {@link org.languagetool.MultiThreadedJLanguageTool} object is created. * As these objects are not thread-safe, this can happen often. Rules should thus * make sure that their initialization works fast. For example, if a rule needs * to load data from disk, it should store it in a static variable to make sure * the loading happens only once. * * @author Daniel Naber */ public abstract class Rule { protected final ResourceBundle messages; private List correctExamples = new ArrayList<>(); private List incorrectExamples = new ArrayList<>(); private List errorTriggeringExamples = new ArrayList<>(); private ITSIssueType locQualityIssueType = ITSIssueType.Uncategorized; private Category category; private URL url; private boolean defaultOff; private boolean officeDefaultOn = false; private boolean officeDefaultOff = false; public Rule() { this(null); } /** * Called by rules that require a translation of their messages. */ public Rule(ResourceBundle messages) { this.messages = messages; if (messages != null) { setCategory(Categories.MISC.getCategory(messages)); // the default, sub classes may overwrite this } else { setCategory(new Category(CategoryIds.MISC, "Miscellaneous")); } } /** * A string used to identify the rule in e.g. configuration files. * This string is supposed to be unique and to stay the same in all upcoming * versions of LanguageTool. It's supposed to contain only the characters {@code A-Z} * and the underscore. */ public abstract String getId(); /** * A short description of the error this rule can detect, usually in the language of the text * that is checked. */ public abstract String getDescription(); /** * Check whether the given sentence matches this error rule, i.e. whether it * contains the error detected by this rule. Note that the order in which * this method is called is not always guaranteed, i.e. the sentence order in the * text may be different than the order in which you get the sentences (this may be the * case when LanguageTool is used as a LibreOffice/OpenOffice add-on, for example). * * @param sentence a pre-analyzed sentence * @return an array of {@link RuleMatch} objects */ public abstract RuleMatch[] match(AnalyzedSentence sentence) throws IOException; /** * Overwrite this to avoid false alarms by ignoring these patterns - * note that your {@link #match(AnalyzedSentence)} method needs to * call {@link #getSentenceWithImmunization} for this to be used * and you need to check {@link AnalyzedTokenReadings#isImmunized()} * @since 3.1 */ public List getAntiPatterns() { return Collections.emptyList(); } /** * Overwrite this to set a default Integer value by option panel * @since 4.1 */ public void setDefaultValue(int num) { } /** * Overwrite this to get a default Integer value by option panel * @since 4.1 */ public int getDefaultValue() { return 0; } /** * To be called from {@link #match(AnalyzedSentence)} for rules that want * {@link #getAntiPatterns()} to be considered. * @since 3.1 */ protected AnalyzedSentence getSentenceWithImmunization(AnalyzedSentence sentence) { if (!getAntiPatterns().isEmpty()) { //we need a copy of the sentence, not reference to the old one AnalyzedSentence immunizedSentence = sentence.copy(sentence); for (DisambiguationPatternRule patternRule : getAntiPatterns()) { try { immunizedSentence = patternRule.replace(immunizedSentence); } catch (IOException e) { throw new RuntimeException(e); } } return immunizedSentence; } return sentence; } /** * Helper for implementing {@link #getAntiPatterns()}. * @since 3.1 */ protected List makeAntiPatterns(List> patternList, Language language) { List rules = new ArrayList<>(); for (List patternTokens : patternList) { rules.add(new DisambiguationPatternRule("INTERNAL_ANTIPATTERN", "(no description)", language, patternTokens, null, null, DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE)); } return Collections.unmodifiableList(rules); } /** * Whether this rule can be used for text in the given language. * Since LanguageTool 2.6, this also works {@link org.languagetool.rules.patterns.PatternRule}s * (before, it used to always return {@code false} for those). */ public boolean supportsLanguage(Language language) { try { List> relevantRuleClasses = new ArrayList<>(); List relevantRules = language.getRelevantRules(JLanguageTool.getMessageBundle()); for (Rule relevantRule : relevantRules) { relevantRuleClasses.add(relevantRule.getClass()); } return relevantRuleClasses.contains(this.getClass()); } catch (IOException e) { throw new RuntimeException(e); } } /** * Whether this is a spelling rule that uses a dictionary. * Rules that return {@code true} here are basically rules that work like * a simple hunspell-like spellchecker: they check words without considering * the words' context. * @since 2.5 */ public boolean isDictionaryBasedSpellingRule() { return false; } /** * Whether this rule should be forced to be used in LO/OO extension. * Rules that return {@code true} will be enabled always in LO/OO extension * regardless of other options like isDictionaryBasedSpellingRule(). * @since 2.6 */ public boolean useInOffice() { return false; } /** * Set the examples that are correct and thus do not trigger the rule. */ public final void setCorrectExamples(List correctExamples) { this.correctExamples = Objects.requireNonNull(correctExamples); } /** * Get example sentences that are correct and thus will not match this rule. */ public final List getCorrectExamples() { return Collections.unmodifiableList(correctExamples); } /** * Set the examples that are incorrect and thus do trigger the rule. */ public final void setIncorrectExamples(List incorrectExamples) { this.incorrectExamples = Objects.requireNonNull(incorrectExamples); } /** * Get example sentences that are incorrect and thus will match this rule. */ public final List getIncorrectExamples() { return Collections.unmodifiableList(incorrectExamples); } /** * Set the examples that are correct but still trigger the rule due to an issue with the rule. * @since 3.5 */ public final void setErrorTriggeringExamples(List examples) { this.errorTriggeringExamples = Objects.requireNonNull(examples); } /** * Get the examples that are correct but still trigger the rule due to an issue with the rule. * @since 3.5 */ public final List getErrorTriggeringExamples() { return Collections.unmodifiableList(this.errorTriggeringExamples); } /** * @return a category (never null since LT 3.4) */ public final Category getCategory() { return category; } public final void setCategory(Category category) { this.category = Objects.requireNonNull(category, "category cannot be null"); } protected final RuleMatch[] toRuleMatchArray(List ruleMatches) { return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]); } /** * Checks whether the rule has been turned off by default by the rule author. * @return True if the rule is turned off by default. */ public final boolean isDefaultOff() { return defaultOff; } /** * Turns the rule off by default. */ public final void setDefaultOff() { defaultOff = true; } /** * Turns the rule on by default. */ public final void setDefaultOn() { defaultOff = false; } /** * Checks whether the rule has been turned off by default for Office Extension by the rule author. * @return True if the rule is turned off. Overrides the default for LO/OO. * @since 4.0 */ public final boolean isOfficeDefaultOff() { return officeDefaultOff; } /** * Checks whether the rule has been turned on by default for Office Extension by the rule author. * @return True if the rule is turned on. Overrides the default for LO/OO. * @since 4.0 */ public final boolean isOfficeDefaultOn() { return officeDefaultOn; } /** * Turns the rule off for Office Extension by default. * @since 4.0 */ public final void setOfficeDefaultOff() { officeDefaultOff = true; } /** * Turns the rule on for Office Extension by default. * @since 4.0 */ public final void setOfficeDefaultOn() { officeDefaultOn = true; } /** * An optional URL describing the rule match in more detail. Typically points to a dictionary or grammar website * with explanations and examples. Will return {@code null} for rules that have no URL. * @since 1.8 */ @Nullable public URL getUrl() { return url; } /** * @since 1.8 * @see #getUrl() */ public void setUrl(URL url) { this.url = url; } /** * Returns the Localization Quality Issue Type, as defined * at http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues. * *

Note that not all languages nor all rules actually map yet to a type yet. In those * cases, uncategorized is returned. * * @return the Localization Quality Issue Type - uncategorized if no type has been assigned * @since 2.5 */ public ITSIssueType getLocQualityIssueType() { return locQualityIssueType; } /** * Set the Localization Quality Issue Type. * @see #getLocQualityIssueType() * @since 2.5 */ public void setLocQualityIssueType(ITSIssueType locQualityIssueType) { this.locQualityIssueType = Objects.requireNonNull(locQualityIssueType); } /** * Convenience method to add a pair of sentences: an incorrect sentence and the same sentence * with the error corrected. * @since 2.5 */ protected void addExamplePair(IncorrectExample incorrectSentence, CorrectExample correctSentence) { incorrectExamples.add(incorrectSentence); correctExamples.add(correctSentence); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy