All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.hunspell.ModifyingSuggester Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.hunspell;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;

/** A class that modifies the given misspelled word in various ways to get correct suggestions */
class ModifyingSuggester {
  private static final int MAX_CHAR_DISTANCE = 4;
  private final LinkedHashSet result;
  private final char[] tryChars;
  private final Hunspell speller;

  ModifyingSuggester(Hunspell speller, LinkedHashSet result) {
    this.speller = speller;
    tryChars = speller.dictionary.tryChars.toCharArray();
    this.result = result;
  }

  /** @return whether any of the added suggestions are considered "good" */
  boolean suggest(String word, WordCase wordCase) {
    String low = wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(word) : word;
    if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
      trySuggestion(low);
    }

    boolean hasGoodSuggestions = tryVariationsOf(word);

    if (wordCase == WordCase.TITLE) {
      hasGoodSuggestions |= tryVariationsOf(low);
    } else if (wordCase == WordCase.UPPER) {
      hasGoodSuggestions |= tryVariationsOf(low);
      hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(word));
    } else if (wordCase == WordCase.MIXED) {
      int dot = word.indexOf('.');
      if (dot > 0
          && dot < word.length() - 1
          && WordCase.caseOf(word.substring(dot + 1)) == WordCase.TITLE) {
        result.add(word.substring(0, dot + 1) + " " + word.substring(dot + 1));
      }

      boolean capitalized = Character.isUpperCase(word.charAt(0));
      if (capitalized) {
        hasGoodSuggestions |=
            tryVariationsOf(speller.dictionary.caseFold(word.charAt(0)) + word.substring(1));
      }

      hasGoodSuggestions |= tryVariationsOf(low);

      if (capitalized) {
        hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(low));
      }

      List adjusted = new ArrayList<>();
      for (String candidate : result) {
        String s = capitalizeAfterSpace(word, candidate);
        adjusted.add(s.equals(candidate) ? adjusted.size() : 0, s);
      }

      result.clear();
      result.addAll(adjusted);
    }
    return hasGoodSuggestions;
  }

  // aNew -> "a New" (instead of "a new")
  private String capitalizeAfterSpace(String misspelled, String candidate) {
    int space = candidate.indexOf(' ');
    int tail = candidate.length() - space - 1;
    if (space > 0
        && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
      return candidate.substring(0, space + 1)
          + Character.toUpperCase(candidate.charAt(space + 1))
          + candidate.substring(space + 2);
    }
    return candidate;
  }

  private boolean tryVariationsOf(String word) {
    boolean hasGoodSuggestions = trySuggestion(word.toUpperCase(Locale.ROOT));
    hasGoodSuggestions |= tryRep(word);

    if (!speller.dictionary.mapTable.isEmpty()) {
      enumerateMapReplacements(word, "", 0);
    }

    trySwappingChars(word);
    tryLongSwap(word);
    tryNeighborKeys(word);
    tryRemovingChar(word);
    tryAddingChar(word);
    tryMovingChar(word);
    tryReplacingChar(word);
    tryTwoDuplicateChars(word);

    List goodSplit = checkDictionaryForSplitSuggestions(word);
    if (!goodSplit.isEmpty()) {
      List copy = new ArrayList<>(result);
      result.clear();
      result.addAll(goodSplit);
      if (hasGoodSuggestions) {
        result.addAll(copy);
      }
      hasGoodSuggestions = true;
    }

    if (!hasGoodSuggestions && speller.dictionary.enableSplitSuggestions) {
      trySplitting(word);
    }
    return hasGoodSuggestions;
  }

  private boolean tryRep(String word) {
    int before = result.size();
    for (RepEntry entry : speller.dictionary.repTable) {
      for (String candidate : entry.substitute(word)) {
        candidate = candidate.trim();
        if (trySuggestion(candidate)) {
          continue;
        }

        if (candidate.contains(" ")
            && Arrays.stream(candidate.split(" ")).allMatch(this::checkSimpleWord)) {
          result.add(candidate);
        }
      }
    }
    return result.size() > before;
  }

  private void enumerateMapReplacements(String word, String accumulated, int offset) {
    if (offset == word.length()) {
      trySuggestion(accumulated);
      return;
    }

    for (List entries : speller.dictionary.mapTable) {
      for (String entry : entries) {
        if (word.regionMatches(offset, entry, 0, entry.length())) {
          for (String replacement : entries) {
            if (!entry.equals(replacement)) {
              enumerateMapReplacements(word, accumulated + replacement, offset + entry.length());
            }
          }
        }
      }
    }

    enumerateMapReplacements(word, accumulated + word.charAt(offset), offset + 1);
  }

  private boolean checkSimpleWord(String part) {
    return Boolean.TRUE.equals(speller.checkSimpleWord(part.toCharArray(), part.length(), null));
  }

  private void trySwappingChars(String word) {
    int length = word.length();
    for (int i = 0; i < length - 1; i++) {
      char c1 = word.charAt(i);
      char c2 = word.charAt(i + 1);
      trySuggestion(word.substring(0, i) + c2 + c1 + word.substring(i + 2));
    }

    if (length == 4 || length == 5) {
      tryDoubleSwapForShortWords(word, length);
    }
  }

  // ahev -> have, owudl -> would
  private void tryDoubleSwapForShortWords(String word, int length) {
    char[] candidate = word.toCharArray();
    candidate[0] = word.charAt(1);
    candidate[1] = word.charAt(0);
    candidate[length - 1] = word.charAt(length - 2);
    candidate[length - 2] = word.charAt(length - 1);
    trySuggestion(new String(candidate));

    if (candidate.length == 5) {
      candidate[0] = word.charAt(0);
      candidate[1] = word.charAt(2);
      candidate[2] = word.charAt(1);
      trySuggestion(new String(candidate));
    }
  }

  private void tryNeighborKeys(String word) {
    for (int i = 0; i < word.length(); i++) {
      char c = word.charAt(i);
      char up = Character.toUpperCase(c);
      if (up != c) {
        trySuggestion(word.substring(0, i) + up + word.substring(i + 1));
      }

      // check neighbor characters in keyboard string
      for (String group : speller.dictionary.neighborKeyGroups) {
        if (group.indexOf(c) >= 0) {
          for (int j = 0; j < group.length(); j++) {
            if (group.charAt(j) != c) {
              trySuggestion(word.substring(0, i) + group.charAt(j) + word.substring(i + 1));
            }
          }
        }
      }
    }
  }

  private void tryLongSwap(String word) {
    for (int i = 0; i < word.length(); i++) {
      for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
        char c1 = word.charAt(i);
        char c2 = word.charAt(j);
        String prefix = word.substring(0, i);
        String suffix = word.substring(j + 1);
        trySuggestion(prefix + c2 + word.substring(i + 1, j) + c1 + suffix);
      }
    }
  }

  private void tryRemovingChar(String word) {
    if (word.length() == 1) return;

    for (int i = 0; i < word.length(); i++) {
      trySuggestion(word.substring(0, i) + word.substring(i + 1));
    }
  }

  private void tryAddingChar(String word) {
    for (int i = 0; i <= word.length(); i++) {
      String prefix = word.substring(0, i);
      String suffix = word.substring(i);
      for (char toInsert : tryChars) {
        trySuggestion(prefix + toInsert + suffix);
      }
    }
  }

  private void tryMovingChar(String word) {
    for (int i = 0; i < word.length(); i++) {
      String prefix = word.substring(0, i);
      for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
        trySuggestion(prefix + word.substring(i + 1, j) + word.charAt(i) + word.substring(j));
        trySuggestion(prefix + word.charAt(j) + word.substring(i, j) + word.substring(j + 1));
      }
      if (i < word.length() - 1) {
        trySuggestion(prefix + word.substring(i + 1) + word.charAt(i));
      }
    }
  }

  private void tryReplacingChar(String word) {
    for (int i = 0; i < word.length(); i++) {
      String prefix = word.substring(0, i);
      String suffix = word.substring(i + 1);
      for (char toInsert : tryChars) {
        if (toInsert != word.charAt(i)) {
          trySuggestion(prefix + toInsert + suffix);
        }
      }
    }
  }

  // perhaps we doubled two characters
  // (for example vacation -> vacacation)
  private void tryTwoDuplicateChars(String word) {
    int dupLen = 0;
    for (int i = 2; i < word.length(); i++) {
      if (word.charAt(i) == word.charAt(i - 2)) {
        dupLen++;
        if (dupLen == 3 || dupLen == 2 && i >= 4) {
          trySuggestion(word.substring(0, i - 1) + word.substring(i + 1));
          dupLen = 0;
        }
      } else {
        dupLen = 0;
      }
    }
  }

  private List checkDictionaryForSplitSuggestions(String word) {
    List result = new ArrayList<>();
    for (int i = 1; i < word.length() - 1; i++) {
      String w1 = word.substring(0, i);
      String w2 = word.substring(i);
      String spaced = w1 + " " + w2;
      if (speller.checkWord(spaced)) {
        result.add(spaced);
      }
      if (shouldSplitByDash()) {
        String dashed = w1 + "-" + w2;
        if (speller.checkWord(dashed)) {
          result.add(dashed);
        }
      }
    }
    return result;
  }

  private void trySplitting(String word) {
    for (int i = 1; i < word.length(); i++) {
      String w1 = word.substring(0, i);
      String w2 = word.substring(i);
      if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
        result.add(w1 + " " + w2);
        if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
          result.add(w1 + "-" + w2);
        }
      }
    }
  }

  private boolean shouldSplitByDash() {
    return speller.dictionary.tryChars.contains("-") || speller.dictionary.tryChars.contains("a");
  }

  private boolean trySuggestion(String candidate) {
    return speller.checkWord(candidate) && result.add(candidate);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy