All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.rules.Rule Maven / Gradle / Ivy

Go to download

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.5
Show newest version
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules;

import com.google.common.base.Suppliers;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.*;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;

import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.function.Supplier;

/**
 * Abstract rule class. A Rule describes a language error and can test whether a
 * given pre-analyzed text contains that error using the {@link Rule#match(AnalyzedSentence)}
 * method.
 *
 * 

Rules are created whenever a {@link JLanguageTool} or * a {@link org.languagetool.MultiThreadedJLanguageTool} object is created. * As these objects are not thread-safe, this can happen often. Rules should thus * make sure that their initialization works fast. For example, if a rule needs * to load data from disk, it should store it in a static variable to make sure * the loading happens only once. * * Rules also need to make sure their {@code match()} code is stateless, i.e. that * its results are not influenced by previous calls to {@code match()} (this is relevant * if pipeline caching is used). * * @author Daniel Naber */ public abstract class Rule { private static final Category MISC = new Category(CategoryIds.MISC, "Miscellaneous"); protected final ResourceBundle messages; @Nullable private List tags; @Nullable private List toneTags; private boolean isGoalSpecific = false; private List correctExamples; private List incorrectExamples; private List errorTriggeringExamples; private ITSIssueType locQualityIssueType = ITSIssueType.Uncategorized; private Category category; private URL url; private boolean isPremium; private boolean defaultOff; private boolean defaultTempOff; private boolean officeDefaultOn = false; private boolean officeDefaultOff = false; private int minPrevMatches = 0; // minimum number of previous matches to show the rule private int distanceTokens = -1; // distance (number of tokens) between matches to consider a repetition public Rule() { this(null); } /** * Called by rules that require a translation of their messages. */ public Rule(ResourceBundle messages) { this.messages = messages; if (messages != null) { setCategory(Categories.MISC.getCategory(messages)); // the default, sub classes may overwrite this } else { setCategory(MISC); } } /** * A string used to identify the rule in e.g. configuration files. * This string is supposed to be unique and to stay the same in all upcoming * versions of LanguageTool. It's supposed to contain only the characters {@code A-Z} * and the underscore. */ public abstract String getId(); /** * Same as {@link #getId()} for Java rules. For XML rules, this can contain a number * that identifies the subrule of a rule group. * @since 4.9 */ public String getFullId() { if (getSubId() != null) { return getId() + "[" + getSubId() + "]"; } else { return getId(); } } @Nullable /** * Optional, mostly used for XML rules (pulled from there to all rules for uniformity) * @since 5.7 */ public String getSubId() { return null; } @Nullable /** * Optional, mostly used for XML rules (pulled from there to all rules for uniformity) * For XML rules, this returns the file that this rule was loaded from */ public String getSourceFile() { return null; } /** * A short description of the error this rule can detect, usually in the language of the text * that is checked. */ public abstract String getDescription(); /** * Check whether the given sentence matches this error rule, i.e. whether it * contains the error detected by this rule. Note that the order in which * this method is called is not always guaranteed, i.e. the sentence order in the * text may be different from the order in which you get the sentences (this may be the * case when LanguageTool is used as a LibreOffice/OpenOffice add-on, for example). * In other words, implementations must be stateless, so that a previous call to * this method has no influence on later calls. * * @param sentence a pre-analyzed sentence * @return an array of {@link RuleMatch} objects */ public abstract RuleMatch[] match(AnalyzedSentence sentence) throws IOException; /** * A number that estimates how many words there must be after a match before we * can be (relatively) sure the match is valid. This is useful for check-as-you-type, * where a match might occur and the word that gets typed next makes the match * disappear (something one would obviously like to avoid). * Note: this may over-estimate the real context size. * Returns {@code -1} when the sentence needs to end to be sure there's a match. * @since 4.5 */ public int estimateContextForSureMatch() { return 0; } /** * Overwrite this to avoid false alarms by ignoring these patterns - * note that your {@link #match(AnalyzedSentence)} method needs to * call {@link #getSentenceWithImmunization} for this to be used * and you need to check {@link AnalyzedTokenReadings#isImmunized()} * @since 3.1 */ public List getAntiPatterns() { return Collections.emptyList(); } /** * Overwrite this to return true, if a value may be configured by option panel * @since 4.2 */ public boolean hasConfigurableValue() { return false; } /** * Overwrite this to get a default Integer value by option panel * @since 4.1 */ public int getDefaultValue() { return 0; } /** * Overwrite this to define the minimum of a configurable value * @since 4.2 */ public int getMinConfigurableValue() { return 0; } /** * Overwrite this to define the maximum of a configurable value * @since 4.2 */ public int getMaxConfigurableValue() { return 100; } /** * Overwrite this to define the Text in the option panel for the configurable value * @since 4.2 */ public String getConfigureText() { return ""; } /** * To be called from {@link #match(AnalyzedSentence)} for rules that want * {@link #getAntiPatterns()} to be considered. * @since 3.1 */ protected AnalyzedSentence getSentenceWithImmunization(AnalyzedSentence sentence) { if (!getAntiPatterns().isEmpty()) { //we need a copy of the sentence, not reference to the old one AnalyzedSentence immunizedSentence = sentence.copy(sentence); for (DisambiguationPatternRule patternRule : getAntiPatterns()) { try { immunizedSentence = patternRule.replace(immunizedSentence); } catch (IOException e) { throw new RuntimeException(e); } } return immunizedSentence; } return sentence; } /** * Helper for implementing {@link #getAntiPatterns()}. The result of this method should better be cached, please see * {@link #cacheAntiPatterns} which does that. * @since 3.1 */ protected static List makeAntiPatterns(List> patternList, Language language) { List rules = new ArrayList<>(); for (List patternTokens : patternList) { rules.add(new DisambiguationPatternRule("INTERNAL_ANTIPATTERN", "(no description)", language, patternTokens, null, null, DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE)); } return rules; } /** * @return a memoizing supplier that caches the result of {@link #makeAntiPatterns}. It makes sense * to store the returned value, e.g. in a field. * @since 5.2 */ protected static Supplier> cacheAntiPatterns(Language language, List> antiPatterns) { return Suppliers.memoize(() -> makeAntiPatterns(antiPatterns, language)); } /** * Whether this rule can be used for text in the given language. * Since LanguageTool 2.6, this also works {@link org.languagetool.rules.patterns.PatternRule}s * (before, it used to always return {@code false} for those). */ public boolean supportsLanguage(Language language) { try { List> relevantRuleClasses = new ArrayList<>(); UserConfig config = new UserConfig(); List relevantRules = new ArrayList<>(language.getRelevantRules(JLanguageTool.getMessageBundle(), config, null, Collections.emptyList())); // empty UserConfig has to be added to prevent null pointer exception relevantRules.addAll(language.getRelevantLanguageModelCapableRules(JLanguageTool.getMessageBundle(), null, null, config, null, Collections.emptyList())); for (Rule relevantRule : relevantRules) { relevantRuleClasses.add(relevantRule.getClass()); } return relevantRuleClasses.contains(this.getClass()); } catch (IOException e) { throw new RuntimeException(e); } } /** * Whether this is a spelling rule that uses a dictionary. * Rules that return {@code true} here are basically rules that work like * a simple hunspell-like spellchecker: they check words without considering * the words' context. * @since 2.5 */ public boolean isDictionaryBasedSpellingRule() { return false; } /** * Whether this rule should be forced to be used in LO/OO extension. * Rules that return {@code true} will be enabled always in LO/OO extension * regardless of other options like isDictionaryBasedSpellingRule(). * @since 2.6 */ public boolean useInOffice() { return false; } /** * Set the examples that are correct and thus do not trigger the rule. */ public final void setCorrectExamples(List correctExamples) { this.correctExamples = correctExamples.isEmpty() ? null : correctExamples; } /** * Get example sentences that are correct and thus will not match this rule. */ public final List getCorrectExamples() { return correctExamples == null ? Collections.emptyList() : Collections.unmodifiableList(correctExamples); } /** * Set the examples that are incorrect and thus do trigger the rule. */ public final void setIncorrectExamples(List incorrectExamples) { this.incorrectExamples = incorrectExamples.isEmpty() ? null : incorrectExamples; } /** * Get example sentences that are incorrect and thus will match this rule. */ public final List getIncorrectExamples() { return incorrectExamples == null ? Collections.emptyList() : Collections.unmodifiableList(incorrectExamples); } /** * Set the examples that are correct but still trigger the rule due to an issue with the rule. * @since 3.5 */ public final void setErrorTriggeringExamples(List examples) { this.errorTriggeringExamples = examples.isEmpty() ? null : examples; } /** * Get the examples that are correct but still trigger the rule due to an issue with the rule. * @since 3.5 */ public final List getErrorTriggeringExamples() { return errorTriggeringExamples == null ? Collections.emptyList() : Collections.unmodifiableList(this.errorTriggeringExamples); } /** * @return a category (never null since LT 3.4) */ @NotNull public Category getCategory() { return category; } public final void setCategory(Category category) { this.category = Objects.requireNonNull(category, "category cannot be null"); } protected final RuleMatch[] toRuleMatchArray(List ruleMatches) { return ruleMatches.toArray(new RuleMatch[0]); } /** * Checks whether the rule has been turned off by default by the rule author. * @return True if the rule is turned off by default. */ public final boolean isDefaultOff() { return defaultOff; } /** * Checks whether the rule has been turned off using "default='temp_off'" by default by the rule author. * This is a special case where the rule is off for users but active for nightly regression checks. */ public final boolean isDefaultTempOff() { return defaultTempOff; } /** * Turns the rule off by default. */ public final void setDefaultOff() { defaultOff = true; } /** * Turns the pattern rule off by default, expect for internal regression tests. */ public final void setDefaultTempOff() { defaultOff = true; defaultTempOff = true; } /** * Turns the rule on by default. */ public final void setDefaultOn() { defaultOff = false; } /** * Checks whether the rule has been turned off by default for Office Extension by the rule author. * @return True if the rule is turned off. Overrides the default for LO/OO. * @since 4.0 */ public final boolean isOfficeDefaultOff() { return officeDefaultOff; } /** * Checks whether the rule has been turned on by default for Office Extension by the rule author. * @return True if the rule is turned on. Overrides the default for LO/OO. * @since 4.0 */ public final boolean isOfficeDefaultOn() { return officeDefaultOn; } /** * Turns the rule off for Office Extension by default. * @since 4.0 */ public final void setOfficeDefaultOff() { officeDefaultOff = true; } /** * Turns the rule on for Office Extension by default. * @since 4.0 */ public final void setOfficeDefaultOn() { officeDefaultOn = true; } /** * An optional URL describing the rule match in more detail. Typically points to a dictionary or grammar website * with explanations and examples. Will return {@code null} for rules that have no URL. * @since 1.8 */ @Nullable public URL getUrl() { return url; } /** * @since 1.8 * @see #getUrl() */ public void setUrl(URL url) { this.url = url; } /** * Returns the Localization Quality Issue Type, as defined * at http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues. * *

Note that not all languages nor all rules actually map yet to a type yet. In those * cases, uncategorized is returned. * * @return the Localization Quality Issue Type - uncategorized if no type has been assigned * @since 2.5 */ public ITSIssueType getLocQualityIssueType() { return locQualityIssueType; } /** * Set the Localization Quality Issue Type. * @see #getLocQualityIssueType() * @since 2.5 */ public void setLocQualityIssueType(ITSIssueType locQualityIssueType) { this.locQualityIssueType = Objects.requireNonNull(locQualityIssueType); } /** * Convenience method to add a pair of sentences: an incorrect sentence and the same sentence * with the error corrected. * @since 2.5 */ protected void addExamplePair(IncorrectExample incorrectSentence, CorrectExample correctSentence) { if (correctExamples == null) { correctExamples = new ArrayList<>(0); } if (incorrectExamples == null) { incorrectExamples = new ArrayList<>(0); } String correctExample = correctSentence.getExample(); int markerStart= correctExample.indexOf(""); int markerEnd = correctExample.indexOf(""); if (markerStart != -1 && markerEnd != -1) { List correction = Collections.singletonList(correctExample.substring(markerStart + "".length(), markerEnd)); incorrectExamples.add(new IncorrectExample(incorrectSentence.getExample(), correction)); } else { incorrectExamples.add(incorrectSentence); } correctExamples.add(correctSentence); } /** * Convenience method to set a pair of sentences: an incorrect sentence and the same sentence * with the error corrected. * @since 4.9 */ protected void setExamplePair(IncorrectExample incorrectSentence, CorrectExample correctSentence) { if (incorrectExamples != null) { incorrectExamples.clear(); } if (correctSentence != null) { correctExamples.clear(); } addExamplePair(incorrectSentence, correctSentence); } /** * @since 5.1 */ public void addTags(List tags) { if (tags.isEmpty()) return; List myTags = this.tags; if (myTags == null) { this.tags = myTags = new ArrayList<>(); } for (String tag : tags) { if (myTags.stream().noneMatch(k -> k.name().equals(tag))) { myTags.add(Tag.valueOf(tag)); } } } /** * @since 5.1 */ public void setTags(List tags) { this.tags = tags.isEmpty() ? null : Objects.requireNonNull(tags); } /** @since 5.1 */ @NotNull public List getTags() { return tags == null ? Collections.emptyList() : tags; } /** @since 5.1 */ public boolean hasTag(Tag tag) { return tags != null && tags.contains(tag); } /** * @since 6.2 */ public void addToneTags(List toneTags) { if (toneTags == null || toneTags.isEmpty()) { return; } List tags = this.toneTags; if (tags == null) { this.toneTags = tags = new ArrayList<>(); } for (String toneTag : toneTags) { if (tags.stream().noneMatch(k -> k.name().equals(toneTag))) { tags.add(ToneTag.valueOf(toneTag)); } } } /** * @since 6.2 */ public void setToneTags(List toneTags) { this.toneTags = toneTags.isEmpty() ? null : Objects.requireNonNull(toneTags); } /** * @since 6.2 */ @NotNull public List getToneTags() { return this.toneTags == null ? Collections.emptyList() : this.toneTags; } /** * @since 6.2 */ public boolean hasToneTag(ToneTag toneTag) { return this.toneTags != null && this.toneTags.contains(toneTag); } public boolean isPremium() { return isPremium; } public void setPremium(boolean premium) { isPremium = premium; } public void setMinPrevMatches(int i) { minPrevMatches = i; } public int getMinPrevMatches() { return minPrevMatches; } public void setDistanceTokens(int i) { distanceTokens = i; } public int getDistanceTokens() { return distanceTokens; } public boolean isGoalSpecific() { return isGoalSpecific; } public void setGoalSpecific(boolean goalSpecific) { isGoalSpecific = goalSpecific; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy