All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.hunspell.ModifyingSuggester Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.hunspell;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

/** A class that modifies the given misspelled word in various ways to get correct suggestions */
class ModifyingSuggester {
  private static final int MAX_CHAR_DISTANCE = 4;
  private final LinkedHashSet result;
  private final String misspelled;
  private final WordCase wordCase;
  private final FragmentChecker fragmentChecker;
  private final boolean proceedPastRep;
  private final char[] tryChars;
  private final Hunspell speller;

  ModifyingSuggester(
      Hunspell speller,
      LinkedHashSet result,
      String misspelled,
      WordCase wordCase,
      FragmentChecker checker,
      boolean proceedPastRep) {
    this.speller = speller;
    tryChars = speller.dictionary.tryChars.toCharArray();
    this.result = result;
    this.misspelled = misspelled;
    this.wordCase = wordCase;
    fragmentChecker = checker;
    this.proceedPastRep = proceedPastRep;
  }

  /**
   * @return whether any of the added suggestions are considered "good"
   */
  boolean suggest() {
    String low =
        wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(misspelled) : misspelled;
    if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
      trySuggestion(low);
    }

    boolean hasGoodSuggestions = tryVariationsOf(misspelled);

    if (wordCase == WordCase.TITLE) {
      hasGoodSuggestions |= tryVariationsOf(low);
    } else if (wordCase == WordCase.UPPER) {
      hasGoodSuggestions |= tryVariationsOf(low);
      hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(misspelled));
    } else if (wordCase == WordCase.MIXED) {
      int dot = misspelled.indexOf('.');
      if (dot > 0 && dot < misspelled.length() - 1) {
        String afterDot = misspelled.substring(dot + 1);
        if (WordCase.caseOf(afterDot) == WordCase.TITLE) {
          result.add(createSuggestion(misspelled.substring(0, dot + 1) + " " + afterDot));
        }
      }

      char first = misspelled.charAt(0);
      boolean capitalized = Character.isUpperCase(first);
      if (capitalized) {
        hasGoodSuggestions |=
            tryVariationsOf(speller.dictionary.caseFold(first) + misspelled.substring(1));
      }

      hasGoodSuggestions |= tryVariationsOf(low);

      if (capitalized) {
        hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(low));
      }

      List reordered = new ArrayList<>();
      for (Suggestion candidate : result) {
        Suggestion changed = capitalizeAfterSpace(candidate.raw);
        if (changed == null) {
          reordered.add(candidate);
        } else {
          reordered.add(0, changed);
        }
      }

      result.clear();
      result.addAll(reordered);
    }
    return hasGoodSuggestions;
  }

  private Suggestion createSuggestion(String candidate) {
    return new Suggestion(candidate, misspelled, wordCase, speller);
  }

  // aNew -> "a New" (instead of "a new")
  private Suggestion capitalizeAfterSpace(String candidate) {
    int space = candidate.indexOf(' ');
    int tail = candidate.length() - space - 1;
    if (space > 0
        && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
      return createSuggestion(
          candidate.substring(0, space + 1)
              + Character.toUpperCase(candidate.charAt(space + 1))
              + candidate.substring(space + 2));
    }
    return null;
  }

  private boolean tryVariationsOf(String word) {
    boolean hasGoodSuggestions = trySuggestion(word.toUpperCase(Locale.ROOT));

    GradedSuggestions repResult = tryRep(word);
    if (repResult == GradedSuggestions.Best && !proceedPastRep) return true;

    hasGoodSuggestions |= repResult != GradedSuggestions.None;

    if (!speller.dictionary.mapTable.isEmpty()) {
      enumerateMapReplacements(word, "", 0);
    }

    trySwappingChars(word);
    tryLongSwap(word);
    tryNeighborKeys(word);
    tryRemovingChar(word);
    tryAddingChar(word);
    tryMovingChar(word);
    tryReplacingChar(word);
    tryTwoDuplicateChars(word);

    List goodSplit = checkDictionaryForSplitSuggestions(word);
    if (!goodSplit.isEmpty()) {
      List copy = new ArrayList<>(result);
      result.clear();
      result.addAll(goodSplit);
      if (hasGoodSuggestions) {
        result.addAll(copy);
      }
      hasGoodSuggestions = true;
    }

    if (!hasGoodSuggestions && speller.dictionary.enableSplitSuggestions) {
      trySplitting(word);
    }
    return hasGoodSuggestions;
  }

  private enum GradedSuggestions {
    None,
    Normal,
    Best
  }

  private GradedSuggestions tryRep(String word) {
    boolean hasBest = false;
    int before = result.size();
    for (RepEntry entry : speller.dictionary.repTable) {
      for (String candidate : entry.substitute(word)) {
        candidate = candidate.trim();
        if (trySuggestion(candidate)) {
          hasBest = true;
          continue;
        }

        if (candidate.contains(" ")
            && Arrays.stream(candidate.split(" ")).allMatch(this::checkSimpleWord)) {
          result.add(createSuggestion(candidate));
        }
      }
    }
    if (hasBest) return GradedSuggestions.Best;
    return result.size() > before ? GradedSuggestions.Normal : GradedSuggestions.None;
  }

  private void enumerateMapReplacements(String word, String accumulated, int offset) {
    if (offset == word.length()) {
      trySuggestion(accumulated);
      return;
    }

    int length = accumulated.length();

    for (List entries : speller.dictionary.mapTable) {
      for (String entry : entries) {
        if (word.regionMatches(offset, entry, 0, entry.length())) {
          for (String replacement : entries) {
            if (!entry.equals(replacement)) {
              String next = accumulated + replacement;
              int end = length + replacement.length();
              if (!fragmentChecker.hasImpossibleFragmentAround(next, length, end)) {
                enumerateMapReplacements(word, next, offset + entry.length());
              }
            }
          }
        }
      }
    }

    String next = accumulated + word.charAt(offset);
    if (!fragmentChecker.hasImpossibleFragmentAround(next, length, length + 1)) {
      enumerateMapReplacements(word, next, offset + 1);
    }
  }

  private boolean checkSimpleWord(String part) {
    return Boolean.TRUE.equals(speller.checkSimpleWord(part.toCharArray(), part.length(), null));
  }

  private void trySwappingChars(String word) {
    int length = word.length();
    for (int i = 0; i < length - 1; i++) {
      char c1 = word.charAt(i);
      char c2 = word.charAt(i + 1);
      trySuggestion(word.substring(0, i) + c2 + c1 + word.substring(i + 2));
    }

    if (length == 4 || length == 5) {
      tryDoubleSwapForShortWords(word, length);
    }
  }

  // ahev -> have, owudl -> would
  private void tryDoubleSwapForShortWords(String word, int length) {
    char[] candidate = word.toCharArray();
    candidate[0] = word.charAt(1);
    candidate[1] = word.charAt(0);
    candidate[length - 1] = word.charAt(length - 2);
    candidate[length - 2] = word.charAt(length - 1);
    trySuggestion(new String(candidate));

    if (candidate.length == 5) {
      candidate[0] = word.charAt(0);
      candidate[1] = word.charAt(2);
      candidate[2] = word.charAt(1);
      trySuggestion(new String(candidate));
    }
  }

  private void tryNeighborKeys(String word) {
    for (int i = 0; i < word.length(); i++) {
      char c = word.charAt(i);
      char up = Character.toUpperCase(c);
      if (up != c) {
        trySuggestion(word.substring(0, i) + up + word.substring(i + 1));
      }

      // check neighbor characters in keyboard string
      for (String group : speller.dictionary.neighborKeyGroups) {
        if (group.indexOf(c) >= 0) {
          for (int j = 0; j < group.length(); j++) {
            if (group.charAt(j) != c) {
              tryModifiedSuggestions(
                  i, word.substring(0, i) + group.charAt(j) + word.substring(i + 1));
            }
          }
        }
      }
    }
  }

  private void tryModifiedSuggestions(int modOffset, String candidate) {
    if (!fragmentChecker.hasImpossibleFragmentAround(candidate, modOffset, modOffset + 1)) {
      trySuggestion(candidate);
    }
  }

  private void tryLongSwap(String word) {
    for (int i = 0; i < word.length(); i++) {
      for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
        char c1 = word.charAt(i);
        char c2 = word.charAt(j);
        String prefix = word.substring(0, i);
        String suffix = word.substring(j + 1);
        trySuggestion(prefix + c2 + word.substring(i + 1, j) + c1 + suffix);
      }
    }
  }

  private void tryRemovingChar(String word) {
    if (word.length() == 1) return;

    for (int i = 0; i < word.length(); i++) {
      trySuggestion(word.substring(0, i) + word.substring(i + 1));
    }
  }

  private void tryAddingChar(String word) {
    for (int i = 0; i <= word.length(); i++) {
      String prefix = word.substring(0, i);
      String suffix = word.substring(i);
      for (char toInsert : tryChars) {
        tryModifiedSuggestions(prefix.length(), prefix + toInsert + suffix);
      }
    }
  }

  private void tryMovingChar(String word) {
    for (int i = 0; i < word.length(); i++) {
      String prefix = word.substring(0, i);
      for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
        trySuggestion(prefix + word.substring(i + 1, j) + word.charAt(i) + word.substring(j));
        trySuggestion(prefix + word.charAt(j) + word.substring(i, j) + word.substring(j + 1));
      }
      if (i < word.length() - 1) {
        trySuggestion(prefix + word.substring(i + 1) + word.charAt(i));
      }
    }
  }

  private void tryReplacingChar(String word) {
    for (int i = 0; i < word.length(); i++) {
      String prefix = word.substring(0, i);
      String suffix = word.substring(i + 1);
      for (char toInsert : tryChars) {
        if (toInsert != word.charAt(i)) {
          tryModifiedSuggestions(prefix.length(), prefix + toInsert + suffix);
        }
      }
    }
  }

  // perhaps we doubled two characters
  // (for example vacation -> vacacation)
  private void tryTwoDuplicateChars(String word) {
    int dupLen = 0;
    for (int i = 2; i < word.length(); i++) {
      if (word.charAt(i) == word.charAt(i - 2)) {
        dupLen++;
        if (dupLen == 3 || dupLen == 2 && i >= 4) {
          trySuggestion(word.substring(0, i - 1) + word.substring(i + 1));
          dupLen = 0;
        }
      } else {
        dupLen = 0;
      }
    }
  }

  private List checkDictionaryForSplitSuggestions(String word) {
    List result = new ArrayList<>();
    for (int i = 1; i < word.length() - 1; i++) {
      String w1 = word.substring(0, i);
      String w2 = word.substring(i);
      String spaced = w1 + " " + w2;
      if (speller.checkWord(spaced)) {
        result.add(createSuggestion(spaced));
      }
      if (shouldSplitByDash()) {
        String dashed = w1 + "-" + w2;
        if (speller.checkWord(dashed)) {
          result.add(createSuggestion(dashed));
        }
      }
    }
    return result;
  }

  private void trySplitting(String word) {
    for (int i = 1; i < word.length(); i++) {
      String w1 = word.substring(0, i);
      String w2 = word.substring(i);
      if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
        result.add(createSuggestion(w1 + " " + w2));
        if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
          result.add(createSuggestion(w1 + "-" + w2));
        }
      }
    }
  }

  private boolean shouldSplitByDash() {
    return speller.dictionary.tryChars.contains("-") || speller.dictionary.tryChars.contains("a");
  }

  private final Set tried = new HashSet<>();

  private boolean trySuggestion(String candidate) {
    return tried.add(candidate)
        && speller.checkWord(candidate)
        && result.add(createSuggestion(candidate));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy