org.apache.lucene.analysis.hunspell.ModifyingSuggester Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-analysis-common Show documentation
Show all versions of lucene-analysis-common Show documentation
Apache Lucene (module: common)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
/** A class that modifies the given misspelled word in various ways to get correct suggestions */
class ModifyingSuggester {
private static final int MAX_CHAR_DISTANCE = 4;
private final LinkedHashSet result;
private final String misspelled;
private final WordCase wordCase;
private final FragmentChecker fragmentChecker;
private final char[] tryChars;
private final Hunspell speller;
ModifyingSuggester(
Hunspell speller,
LinkedHashSet result,
String misspelled,
WordCase wordCase,
FragmentChecker checker) {
this.speller = speller;
tryChars = speller.dictionary.tryChars.toCharArray();
this.result = result;
this.misspelled = misspelled;
this.wordCase = wordCase;
fragmentChecker = checker;
}
/**
* @return whether any of the added suggestions are considered "good"
*/
boolean suggest() {
String low =
wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(misspelled) : misspelled;
if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
trySuggestion(low);
}
boolean hasGoodSuggestions = tryVariationsOf(misspelled);
if (wordCase == WordCase.TITLE) {
hasGoodSuggestions |= tryVariationsOf(low);
} else if (wordCase == WordCase.UPPER) {
hasGoodSuggestions |= tryVariationsOf(low);
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(misspelled));
} else if (wordCase == WordCase.MIXED) {
int dot = misspelled.indexOf('.');
if (dot > 0 && dot < misspelled.length() - 1) {
String afterDot = misspelled.substring(dot + 1);
if (WordCase.caseOf(afterDot) == WordCase.TITLE) {
result.add(createSuggestion(misspelled.substring(0, dot + 1) + " " + afterDot));
}
}
char first = misspelled.charAt(0);
boolean capitalized = Character.isUpperCase(first);
if (capitalized) {
hasGoodSuggestions |=
tryVariationsOf(speller.dictionary.caseFold(first) + misspelled.substring(1));
}
hasGoodSuggestions |= tryVariationsOf(low);
if (capitalized) {
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(low));
}
List reordered = new ArrayList<>();
for (Suggestion candidate : result) {
Suggestion changed = capitalizeAfterSpace(candidate.raw);
if (changed == null) {
reordered.add(candidate);
} else {
reordered.add(0, changed);
}
}
result.clear();
result.addAll(reordered);
}
return hasGoodSuggestions;
}
private Suggestion createSuggestion(String candidate) {
return new Suggestion(candidate, misspelled, wordCase, speller);
}
// aNew -> "a New" (instead of "a new")
private Suggestion capitalizeAfterSpace(String candidate) {
int space = candidate.indexOf(' ');
int tail = candidate.length() - space - 1;
if (space > 0
&& !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
return createSuggestion(
candidate.substring(0, space + 1)
+ Character.toUpperCase(candidate.charAt(space + 1))
+ candidate.substring(space + 2));
}
return null;
}
private boolean tryVariationsOf(String word) {
boolean hasGoodSuggestions = trySuggestion(word.toUpperCase(Locale.ROOT));
hasGoodSuggestions |= tryRep(word);
if (!speller.dictionary.mapTable.isEmpty()) {
enumerateMapReplacements(word, "", 0);
}
trySwappingChars(word);
tryLongSwap(word);
tryNeighborKeys(word);
tryRemovingChar(word);
tryAddingChar(word);
tryMovingChar(word);
tryReplacingChar(word);
tryTwoDuplicateChars(word);
List goodSplit = checkDictionaryForSplitSuggestions(word);
if (!goodSplit.isEmpty()) {
List copy = new ArrayList<>(result);
result.clear();
result.addAll(goodSplit);
if (hasGoodSuggestions) {
result.addAll(copy);
}
hasGoodSuggestions = true;
}
if (!hasGoodSuggestions && speller.dictionary.enableSplitSuggestions) {
trySplitting(word);
}
return hasGoodSuggestions;
}
private boolean tryRep(String word) {
int before = result.size();
for (RepEntry entry : speller.dictionary.repTable) {
for (String candidate : entry.substitute(word)) {
candidate = candidate.trim();
if (trySuggestion(candidate)) {
continue;
}
if (candidate.contains(" ")
&& Arrays.stream(candidate.split(" ")).allMatch(this::checkSimpleWord)) {
result.add(createSuggestion(candidate));
}
}
}
return result.size() > before;
}
private void enumerateMapReplacements(String word, String accumulated, int offset) {
if (offset == word.length()) {
trySuggestion(accumulated);
return;
}
int length = accumulated.length();
for (List entries : speller.dictionary.mapTable) {
for (String entry : entries) {
if (word.regionMatches(offset, entry, 0, entry.length())) {
for (String replacement : entries) {
if (!entry.equals(replacement)) {
String next = accumulated + replacement;
int end = length + replacement.length();
if (!fragmentChecker.hasImpossibleFragmentAround(next, length, end)) {
enumerateMapReplacements(word, next, offset + entry.length());
}
}
}
}
}
}
String next = accumulated + word.charAt(offset);
if (!fragmentChecker.hasImpossibleFragmentAround(next, length, length + 1)) {
enumerateMapReplacements(word, next, offset + 1);
}
}
private boolean checkSimpleWord(String part) {
return Boolean.TRUE.equals(speller.checkSimpleWord(part.toCharArray(), part.length(), null));
}
private void trySwappingChars(String word) {
int length = word.length();
for (int i = 0; i < length - 1; i++) {
char c1 = word.charAt(i);
char c2 = word.charAt(i + 1);
trySuggestion(word.substring(0, i) + c2 + c1 + word.substring(i + 2));
}
if (length == 4 || length == 5) {
tryDoubleSwapForShortWords(word, length);
}
}
// ahev -> have, owudl -> would
private void tryDoubleSwapForShortWords(String word, int length) {
char[] candidate = word.toCharArray();
candidate[0] = word.charAt(1);
candidate[1] = word.charAt(0);
candidate[length - 1] = word.charAt(length - 2);
candidate[length - 2] = word.charAt(length - 1);
trySuggestion(new String(candidate));
if (candidate.length == 5) {
candidate[0] = word.charAt(0);
candidate[1] = word.charAt(2);
candidate[2] = word.charAt(1);
trySuggestion(new String(candidate));
}
}
private void tryNeighborKeys(String word) {
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
char up = Character.toUpperCase(c);
if (up != c) {
trySuggestion(word.substring(0, i) + up + word.substring(i + 1));
}
// check neighbor characters in keyboard string
for (String group : speller.dictionary.neighborKeyGroups) {
if (group.indexOf(c) >= 0) {
for (int j = 0; j < group.length(); j++) {
if (group.charAt(j) != c) {
tryModifiedSuggestions(
i, word.substring(0, i) + group.charAt(j) + word.substring(i + 1));
}
}
}
}
}
}
private void tryModifiedSuggestions(int modOffset, String candidate) {
if (!fragmentChecker.hasImpossibleFragmentAround(candidate, modOffset, modOffset + 1)) {
trySuggestion(candidate);
}
}
private void tryLongSwap(String word) {
for (int i = 0; i < word.length(); i++) {
for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
char c1 = word.charAt(i);
char c2 = word.charAt(j);
String prefix = word.substring(0, i);
String suffix = word.substring(j + 1);
trySuggestion(prefix + c2 + word.substring(i + 1, j) + c1 + suffix);
}
}
}
private void tryRemovingChar(String word) {
if (word.length() == 1) return;
for (int i = 0; i < word.length(); i++) {
trySuggestion(word.substring(0, i) + word.substring(i + 1));
}
}
private void tryAddingChar(String word) {
for (int i = 0; i <= word.length(); i++) {
String prefix = word.substring(0, i);
String suffix = word.substring(i);
for (char toInsert : tryChars) {
tryModifiedSuggestions(prefix.length(), prefix + toInsert + suffix);
}
}
}
private void tryMovingChar(String word) {
for (int i = 0; i < word.length(); i++) {
String prefix = word.substring(0, i);
for (int j = i + 2; j < word.length() && j <= i + MAX_CHAR_DISTANCE; j++) {
trySuggestion(prefix + word.substring(i + 1, j) + word.charAt(i) + word.substring(j));
trySuggestion(prefix + word.charAt(j) + word.substring(i, j) + word.substring(j + 1));
}
if (i < word.length() - 1) {
trySuggestion(prefix + word.substring(i + 1) + word.charAt(i));
}
}
}
private void tryReplacingChar(String word) {
for (int i = 0; i < word.length(); i++) {
String prefix = word.substring(0, i);
String suffix = word.substring(i + 1);
for (char toInsert : tryChars) {
if (toInsert != word.charAt(i)) {
tryModifiedSuggestions(prefix.length(), prefix + toInsert + suffix);
}
}
}
}
// perhaps we doubled two characters
// (for example vacation -> vacacation)
private void tryTwoDuplicateChars(String word) {
int dupLen = 0;
for (int i = 2; i < word.length(); i++) {
if (word.charAt(i) == word.charAt(i - 2)) {
dupLen++;
if (dupLen == 3 || dupLen == 2 && i >= 4) {
trySuggestion(word.substring(0, i - 1) + word.substring(i + 1));
dupLen = 0;
}
} else {
dupLen = 0;
}
}
}
private List checkDictionaryForSplitSuggestions(String word) {
List result = new ArrayList<>();
for (int i = 1; i < word.length() - 1; i++) {
String w1 = word.substring(0, i);
String w2 = word.substring(i);
String spaced = w1 + " " + w2;
if (speller.checkWord(spaced)) {
result.add(createSuggestion(spaced));
}
if (shouldSplitByDash()) {
String dashed = w1 + "-" + w2;
if (speller.checkWord(dashed)) {
result.add(createSuggestion(dashed));
}
}
}
return result;
}
private void trySplitting(String word) {
for (int i = 1; i < word.length(); i++) {
String w1 = word.substring(0, i);
String w2 = word.substring(i);
if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
result.add(createSuggestion(w1 + " " + w2));
if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
result.add(createSuggestion(w1 + "-" + w2));
}
}
}
}
private boolean shouldSplitByDash() {
return speller.dictionary.tryChars.contains("-") || speller.dictionary.tryChars.contains("a");
}
private final Set tried = new HashSet<>();
private boolean trySuggestion(String candidate) {
return tried.add(candidate)
&& speller.checkWord(candidate)
&& result.add(createSuggestion(candidate));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy