All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.rules.en.EnglishRepeatedWordsRule Maven / Gradle / Ivy

/* LanguageTool, a natural language style checker 
 * Copyright (C) 2021 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.rules.en;

import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Tag;
import org.languagetool.language.AmericanEnglish;
import org.languagetool.rules.AbstractRepeatedWordsRule;
import org.languagetool.rules.SynonymsData;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.rules.patterns.PatternTokenBuilder;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.synthesis.en.EnglishSynthesizer;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;
import org.languagetool.tools.Tools;

import java.util.*;
import java.util.function.Supplier;

import static org.languagetool.rules.patterns.PatternRuleBuilderHelper.*;

public class EnglishRepeatedWordsRule extends AbstractRepeatedWordsRule{
  
  private final Supplier> antiPatterns;

  private static final List> ANTI_PATTERNS = Arrays.asList(
    Arrays.asList(
      new PatternTokenBuilder().csToken("need").matchInflectedForms().build(),   // "I still need -> require to sign in"
      token("to")
    ),

    Arrays.asList(
      new PatternTokenBuilder().tokenRegex("solve(s|d|ing)?").setSkip(3).build(),   // "solve the problem" is a unique collocation
      tokenRegex("problems?")                                                    // "solve the issue|concern|difficulty" sounds bizarre
    ),                                                                             // lots of disables in Matomo

    Arrays.asList(
      posRegex("SENT_START|PCT"),       // "No problem, I'm not in a rush."
      token("no"),
      token("problem"),
      pos("PCT")
    ),

    Arrays.asList(
      tokenRegex("math|word"),       // "math/word problem"
      tokenRegex("problems?")
    ),

    Arrays.asList(
      tokenRegex("as"),       // "doesn't apply to the group as a whole"
      tokenRegex("a"),
      tokenRegex("whole")
    ),

    Arrays.asList(
      token("more"),
      token("often"),
      token("than"),
      token("not")
    ),

    Arrays.asList(
      token("often"),
      token("times")
    ),

    Arrays.asList(
      tokenRegex("details?|facts?|it|journals?|questions?|research|results?|study|studies|this|these|those|which"),
      new PatternTokenBuilder().pos("RB").min(0).build(),
      new PatternTokenBuilder().csToken("suggest").matchInflectedForms().build()
    ),

    Arrays.asList(
      new PatternTokenBuilder().csToken("form").matchInflectedForms().build(),   // "form in the bloodstream"
      posRegex("IN|PCT|RP|TO|SENT_END")
    ),

    Arrays.asList(
      new PatternTokenBuilder().tokenRegex("bonds?|crystals?|ions?|rocks?|.*valence").setSkip(10).build(),
      new PatternTokenBuilder().csToken("form").matchInflectedForms().build()
    ),

    Arrays.asList(
      new PatternTokenBuilder().tokenRegex("form(s|ed|ing)?").setSkip(10).build(),
      tokenRegex("bonds?|crystals?|ions?|rocks?|.*valence")
    ),

    Arrays.asList(
      token("interesting"),
      tokenRegex("facts?|things?")
    ),

    Arrays.asList(
      token("several"),
      tokenRegex("hundreds?|thousands?|millions?")
    ),

    Arrays.asList(
      token("must"),
      token("be"),
      token("nice")
    ),

    Arrays.asList(
      token("nice"),
      token("day")
    ),

    Arrays.asList(
      token("nice"),
      token("to"),
      new PatternTokenBuilder().token("meet").min(0).build(),
      posRegex("PRP_O.*")
    ),

    Arrays.asList(
      new PatternTokenBuilder().csToken("be").matchInflectedForms().build(),  // nice and plump
      token("nice"),
      token("and"),
      pos("JJ"),
      posRegex("PCT|SENT_END")
    ),

    Arrays.asList(
      posRegex("P?DT|PRP$.*"),  // the proposed agreement
      token("proposed"),
      posRegex("N.*")
    ),

    Arrays.asList(
      new PatternTokenBuilder().csToken("propose").matchInflectedForms().build(),
      tokenRegex("to|marriage")
    ),

    Arrays.asList(
      token("too"),
      token("literally")
    ),

    Arrays.asList(
      token("literally"),
      token("and"),
      token("figuratively")
    ),

    Arrays.asList(
      token("literally"),
      token("everything")
    ),

    Arrays.asList(
      token("literally"),
      posRegex("PCT|SENT_END")
    ),

    Arrays.asList(
      posRegex("CC"),       // "Or maybe it's because I have eyes that see!"
      token("maybe")
    )

  );

  @Override
  public List getAntiPatterns() {
    return antiPatterns.get();
  }

  public EnglishRepeatedWordsRule(ResourceBundle messages) {
    super(messages, new AmericanEnglish());
    setTags(Collections.singletonList(Tag.picky));
    antiPatterns = cacheAntiPatterns(new AmericanEnglish(), ANTI_PATTERNS);
    String id = this.getId();
    if (id.equals("EN_REPEATEDWORDS_DEFINITELY")){
      this.setUrl(Tools.getUrl("https://languagetool.org/insights/post/i-agree-synonyms/"));
    }
    if (id.equals("EN_REPEATEDWORDS_CHOOSE")){
      this.setUrl(Tools.getUrl("https://languagetool.org/insights/post/choose-vs-chose/"));
    }
    //super.setDefaultTempOff();
  }
  
  private static final Map wordsToCheck = loadWords("/en/synonyms.txt");
  
  @Override
  protected String getMessage() {
    return "This word has been used in one of the immediately preceding sentences. Using a synonym could make your text more interesting to read, unless the repetition is intentional.";
  }

  @Override
  public String getDescription() {
    return ("Suggest synonyms for repeated words.");
  }

  @Override
  protected Map getWordsToCheck() {
    return wordsToCheck;
  }

  @Override
  protected String getShortMessage() {
    return "Style: repeated word";
  }

  @Override
  protected Synthesizer getSynthesizer() {
    return EnglishSynthesizer.INSTANCE;
  }

  @Override
  protected boolean isException(AnalyzedTokenReadings[] tokens, int i, boolean sentStart, boolean isCapitalized,
      boolean isAllUppercase) {
    if (isAllUppercase || (isCapitalized && !sentStart)) {
      return true;
    }
    if (tokens[i].hasPosTagStartingWith("NNP")) {
      return true;
    }
    return false;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy