org.languagetool.rules.Rule Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of languagetool-core Show documentation
LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.
There is a newer version: 6.4
Show newest version
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules;

import com.google.common.base.Suppliers;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.*;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;

import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.function.Supplier;

/**
 * Abstract rule class. A Rule describes a language error and can test whether a
 * given pre-analyzed text contains that error using the {@link Rule#match(AnalyzedSentence)}
 * method.
 *
 * Rules are created whenever a {@link JLanguageTool} or
 * a {@link org.languagetool.MultiThreadedJLanguageTool} object is created.
 * As these objects are not thread-safe, this can happen often. Rules should thus
 * make sure that their initialization works fast. For example, if a rule needs
 * to load data from disk, it should store it in a static variable to make sure
 * the loading happens only once.
 *
 * Rules also need to make sure their {@code match()} code is stateless, i.e. that
 * its results are not influenced by previous calls to {@code match()} (this is relevant
 * if pipeline caching is used).
 *
 * @author Daniel Naber
 */
public abstract class Rule {

  private static final Category MISC = new Category(CategoryIds.MISC, "Miscellaneous");

  protected final ResourceBundle messages;

  @Nullable
  private List tags;

  private List correctExamples;
  private List incorrectExamples;
  private List errorTriggeringExamples;
  private ITSIssueType locQualityIssueType = ITSIssueType.Uncategorized;
  private Category category;
  private URL url;
  private boolean isPremium;
  private boolean defaultOff;
  private boolean defaultTempOff;
  private boolean officeDefaultOn = false;
  private boolean officeDefaultOff = false;
  private int minPrevMatches = 0; // minimum number of previous matches to show the rule
  private int distanceTokens = -1; // distance (number of tokens) between matches to consider a repetition

  public Rule() {
    this(null);
  }

  /**
   * Called by rules that require a translation of their messages.
   */
  public Rule(ResourceBundle messages) {
    this.messages = messages;
    if (messages != null) {
      setCategory(Categories.MISC.getCategory(messages));  // the default, sub classes may overwrite this
    } else {
      setCategory(MISC);
    }
  }

  /**
   * A string used to identify the rule in e.g. configuration files.
   * This string is supposed to be unique and to stay the same in all upcoming
   * versions of LanguageTool. It's supposed to contain only the characters {@code A-Z} 
   * and the underscore.
   */
  public abstract String getId();

  /**
   * Same as {@link #getId()} for Java rules. For XML rules, this can contain a number
   * that identifies the subrule of a rule group.
   * @since 4.9
   */
  public String getFullId() {
    if (getSubId() != null) {
      return getId() + "[" + getSubId() + "]";
    } else {
      return getId();
    }
  }

  @Nullable
  /**
   * Optional, mostly used for XML rules (pulled from there to all rules for uniformity)
   * @since 5.7
   */
  public String getSubId() {
    return null;
  }

  @Nullable
  /**
   * Optional, mostly used for XML rules (pulled from there to all rules for uniformity)
   * For XML rules, this returns the file that this rule was loaded from
   */
  public String getSourceFile() {
    return null;
  }

  /**
   * A short description of the error this rule can detect, usually in the language of the text
   * that is checked.
   */
  public abstract String getDescription();

  /**
   * Check whether the given sentence matches this error rule, i.e. whether it
   * contains the error detected by this rule. Note that the order in which
   * this method is called is not always guaranteed, i.e. the sentence order in the
   * text may be different from the order in which you get the sentences (this may be the
   * case when LanguageTool is used as a LibreOffice/OpenOffice add-on, for example).
   * In other words, implementations must be stateless, so that a previous call to
   * this method has no influence on later calls.
   *
   * @param sentence a pre-analyzed sentence
   * @return an array of {@link RuleMatch} objects
   */
  public abstract RuleMatch[] match(AnalyzedSentence sentence) throws IOException;

  /**
   * A number that estimates how many words there must be after a match before we
   * can be (relatively) sure the match is valid. This is useful for check-as-you-type,
   * where a match might occur and the word that gets typed next makes the match
   * disappear (something one would obviously like to avoid).
   * Note: this may over-estimate the real context size.
   * Returns {@code -1} when the sentence needs to end to be sure there's a match.
   * @since 4.5
   */
  public int estimateContextForSureMatch() {
    return 0;
  }
    
  /**
   * Overwrite this to avoid false alarms by ignoring these patterns -
   * note that your {@link #match(AnalyzedSentence)} method needs to
   * call {@link #getSentenceWithImmunization} for this to be used
   * and you need to check {@link AnalyzedTokenReadings#isImmunized()}
   * @since 3.1
   */
  public List getAntiPatterns() {
    return Collections.emptyList();
  }

  /**
   * Overwrite this to return true, if a value may be configured by option panel
   * @since 4.2
   */
  public boolean hasConfigurableValue() {
    return false;
  }

  /**
   * Overwrite this to get a default Integer value by option panel
   * @since 4.1
   */
  public int getDefaultValue() {
    return 0;
  }

  /**
   * Overwrite this to define the minimum of a configurable value
   * @since 4.2
   */
  public int getMinConfigurableValue() {
    return 0;
  }

  /**
   * Overwrite this to define the maximum of a configurable value
   * @since 4.2
   */
  public int getMaxConfigurableValue() {
    return 100;
  }

  /**
   * Overwrite this to define the Text in the option panel for the configurable value
   * @since 4.2
   */
  public String getConfigureText() {
    return "";
  }

  /**
   * To be called from {@link #match(AnalyzedSentence)} for rules that want
   * {@link #getAntiPatterns()} to be considered.
   * @since 3.1
   */
  protected AnalyzedSentence getSentenceWithImmunization(AnalyzedSentence sentence) {
    if (!getAntiPatterns().isEmpty()) {
      //we need a copy of the sentence, not reference to the old one
      AnalyzedSentence immunizedSentence = sentence.copy(sentence);
      for (DisambiguationPatternRule patternRule : getAntiPatterns()) {
        try {
          immunizedSentence = patternRule.replace(immunizedSentence);
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
      return immunizedSentence;
    }
    return sentence;
  }

  /**
   * Helper for implementing {@link #getAntiPatterns()}. The result of this method should better be cached, please see
   * {@link #cacheAntiPatterns} which does that.
   * @since 3.1
   */
  protected static List makeAntiPatterns(List> patternList, Language language) {
    List rules = new ArrayList<>();
    for (List patternTokens : patternList) {
      rules.add(new DisambiguationPatternRule("INTERNAL_ANTIPATTERN", "(no description)", language,
              patternTokens, null, null, DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE));
    }
    return rules;
  }

  /**
   * @return a memoizing supplier that caches the result of {@link #makeAntiPatterns}. It makes sense
   * to store the returned value, e.g. in a field.
   * @since 5.2
   */
  protected static Supplier> cacheAntiPatterns(Language language, List> antiPatterns) {
    return Suppliers.memoize(() -> makeAntiPatterns(antiPatterns, language));
  }

  /**
   * Whether this rule can be used for text in the given language.
   * Since LanguageTool 2.6, this also works {@link org.languagetool.rules.patterns.PatternRule}s
   * (before, it used to always return {@code false} for those).
   */
  public boolean supportsLanguage(Language language) {
    try {
      List> relevantRuleClasses = new ArrayList<>();
      UserConfig config = new UserConfig();
      List relevantRules = new ArrayList<>(language.getRelevantRules(JLanguageTool.getMessageBundle(),
          config, null, Collections.emptyList()));  //  empty UserConfig has to be added to prevent null pointer exception
      relevantRules.addAll(language.getRelevantLanguageModelCapableRules(JLanguageTool.getMessageBundle(), null,
        null, config, null, Collections.emptyList()));
      for (Rule relevantRule : relevantRules) {
        relevantRuleClasses.add(relevantRule.getClass());
      }
      return relevantRuleClasses.contains(this.getClass());
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Whether this is a spelling rule that uses a dictionary.
   * Rules that return {@code true} here are basically rules that work like
   * a simple hunspell-like spellchecker: they check words without considering
   * the words' context.
   * @since 2.5
   */
  public boolean isDictionaryBasedSpellingRule() {
    return false;
  }
  
  /**
   * Whether this rule should be forced to be used in LO/OO extension.
   * Rules that return {@code true} will be enabled always in LO/OO extension
   * regardless of other options like isDictionaryBasedSpellingRule().
   * @since 2.6
   */
  public boolean useInOffice() {
    return false;
  }

  /**
   * Set the examples that are correct and thus do not trigger the rule.
   */
  public final void setCorrectExamples(List correctExamples) {
    this.correctExamples = correctExamples.isEmpty() ? null : correctExamples;
  }

  /**
   * Get example sentences that are correct and thus will not match this rule.
   */
  public final List getCorrectExamples() {
    return correctExamples == null ? Collections.emptyList() : Collections.unmodifiableList(correctExamples);
  }

  /**
   * Set the examples that are incorrect and thus do trigger the rule.
   */
  public final void setIncorrectExamples(List incorrectExamples) {
    this.incorrectExamples = incorrectExamples.isEmpty() ? null : incorrectExamples;
  }

  /**
   * Get example sentences that are incorrect and thus will match this rule.
   */
  public final List getIncorrectExamples() {
    return incorrectExamples == null ? Collections.emptyList() : Collections.unmodifiableList(incorrectExamples);
  }

  /**
   * Set the examples that are correct but still trigger the rule due to an issue with the rule.
   * @since 3.5
   */
  public final void setErrorTriggeringExamples(List examples) {
    this.errorTriggeringExamples = examples.isEmpty() ? null : examples;
  }

  /**
   * Get the examples that are correct but still trigger the rule due to an issue with the rule.
   * @since 3.5
   */
  public final List getErrorTriggeringExamples() {
    return errorTriggeringExamples == null ? Collections.emptyList() : Collections.unmodifiableList(this.errorTriggeringExamples);
  }

  /**
   * @return a category (never null since LT 3.4)
   */
  @NotNull
  public Category getCategory() {
    return category;
  }

  public final void setCategory(Category category) {
    this.category = Objects.requireNonNull(category, "category cannot be null");
  }

  protected final RuleMatch[] toRuleMatchArray(List ruleMatches) {
    return ruleMatches.toArray(new RuleMatch[0]);
  }

  /**
   * Checks whether the rule has been turned off by default by the rule author.
   * @return True if the rule is turned off by default.
   */
  public final boolean isDefaultOff() {
    return defaultOff;
  }

  /**
   * Checks whether the rule has been turned off using "default='temp_off'" by default by the rule author.
   * This is a special case where the rule is off for users but active for nightly regression checks.
   */
  public final boolean isDefaultTempOff() {
    return defaultTempOff;
  }

  /**
   * Turns the rule off by default.
   */
  public final void setDefaultOff() {
    defaultOff = true;
  }

  /**
   * Turns the pattern rule off by default, expect for internal regression tests.
   */
  public final void setDefaultTempOff() {
    defaultOff = true;
    defaultTempOff = true;
  }

  /**
   * Turns the rule on by default.
   */
  public final void setDefaultOn() {
    defaultOff = false;
  }
  
  /**
   * Checks whether the rule has been turned off by default for Office Extension by the rule author.
   * @return True if the rule is turned off. Overrides the default for LO/OO.
   * @since 4.0
  */
  public final boolean isOfficeDefaultOff() {
    return officeDefaultOff;
  }

  /**
   * Checks whether the rule has been turned on by default for Office Extension by the rule author.
   * @return True if the rule is turned on. Overrides the default for LO/OO.
   * @since 4.0
   */
  public final boolean isOfficeDefaultOn() {
    return officeDefaultOn;
  }

  /**
   * Turns the rule off for Office Extension by default.
   * @since 4.0
   */
  public final void setOfficeDefaultOff() {
    officeDefaultOff = true;
  }

  /**
   * Turns the rule on for Office Extension by default.
   * @since 4.0
   */
  public final void setOfficeDefaultOn() {
    officeDefaultOn = true;
  }
  
  /**
   * An optional URL describing the rule match in more detail. Typically points to a dictionary or grammar website
   * with explanations and examples. Will return {@code null} for rules that have no URL.
   * @since 1.8
   */
  @Nullable
  public URL getUrl() {
    return url;
  }

  /**
   * @since 1.8
   * @see #getUrl()
   */
  public void setUrl(URL url) {
    this.url = url;
  }

  /**
   * Returns the Localization Quality Issue Type, as defined
   * at http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues.
   *
   * Note that not all languages nor all rules actually map yet to a type yet. In those
   * cases, uncategorized is returned.
   *
   * @return the Localization Quality Issue Type - uncategorized if no type has been assigned
   * @since 2.5
   */
  public ITSIssueType getLocQualityIssueType() {
    return locQualityIssueType;
  }

  /**
   * Set the Localization Quality Issue Type.
   * @see #getLocQualityIssueType()
   * @since 2.5
   */
  public void setLocQualityIssueType(ITSIssueType locQualityIssueType) {
    this.locQualityIssueType = Objects.requireNonNull(locQualityIssueType);
  }

  /**
   * Convenience method to add a pair of sentences: an incorrect sentence and the same sentence
   * with the error corrected.
   * @since 2.5
   */
  protected void addExamplePair(IncorrectExample incorrectSentence, CorrectExample correctSentence) {
    if (correctExamples == null) {
      correctExamples = new ArrayList<>(0);
    }
    if (incorrectExamples == null) {
      incorrectExamples = new ArrayList<>(0);
    }
    String correctExample = correctSentence.getExample();
    int markerStart= correctExample.indexOf("");
    int markerEnd = correctExample.indexOf("");
    if (markerStart != -1 && markerEnd != -1) {
      List correction = Collections.singletonList(correctExample.substring(markerStart + "".length(), markerEnd));
      incorrectExamples.add(new IncorrectExample(incorrectSentence.getExample(), correction));
    } else {
      incorrectExamples.add(incorrectSentence);
    }
    correctExamples.add(correctSentence);
  }

  /**
   * Convenience method to set a pair of sentences: an incorrect sentence and the same sentence
   * with the error corrected.
   * @since 4.9
   */
  protected void setExamplePair(IncorrectExample incorrectSentence, CorrectExample correctSentence) {
    if (incorrectExamples != null) {
      incorrectExamples.clear();
    }
    if (correctSentence != null) {
      correctExamples.clear();
    }
    addExamplePair(incorrectSentence, correctSentence);
  }

  /**
   * @since 5.1
   */
  public void addTags(List tags) {
    if (tags.isEmpty()) return;

    List myTags = this.tags;
    if (myTags == null) {
      this.tags = myTags = new ArrayList<>();
    }
    for (String tag : tags) {
      if (myTags.stream().noneMatch(k -> k.name().equals(tag))) {
        myTags.add(Tag.valueOf(tag));
      }
    }
  }

  /**
   * @since 5.1
   */
  public void setTags(List tags) {
    this.tags = tags.isEmpty() ? null : Objects.requireNonNull(tags);
  }

  /** @since 5.1 */
  @NotNull
  public List getTags() {
    return tags == null ? Collections.emptyList() : tags;
  }

  /** @since 5.1 */
  public boolean hasTag(Tag tag) {
    return tags != null && tags.contains(tag);
  }

  public boolean isPremium() {
    return isPremium;
  }

  public void setPremium(boolean premium) {
    isPremium = premium;
  }
  
  public void setMinPrevMatches(int i) {
    minPrevMatches = i;
  }
  
  public int getMinPrevMatches() {
    return minPrevMatches;
  }
  
  public void setDistanceTokens(int i) {
    distanceTokens = i;
  }
  
  public int getDistanceTokens() {
    return distanceTokens;
  }
}