org.apache.lucene.analysis.hunspell.CheckCompoundPattern Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-analysis-common Show documentation
Show all versions of lucene-analysis-common Show documentation
Apache Lucene (module: common)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import org.apache.lucene.util.CharsRef;
class CheckCompoundPattern {
private final String endChars;
private final String beginChars;
private final String replacement;
private final char[] endFlags;
private final char[] beginFlags;
private final Dictionary dictionary;
CheckCompoundPattern(
String unparsed, Dictionary.FlagParsingStrategy strategy, Dictionary dictionary) {
this.dictionary = dictionary;
String[] parts = unparsed.split("\\s+");
if (parts.length < 3) {
throw new IllegalArgumentException("Invalid pattern: " + unparsed);
}
int flagSep = parts[1].indexOf("/");
endChars = flagSep < 0 ? parts[1] : parts[1].substring(0, flagSep);
endFlags = flagSep < 0 ? new char[0] : strategy.parseFlags(parts[1].substring(flagSep + 1));
flagSep = parts[2].indexOf("/");
beginChars = flagSep < 0 ? parts[2] : parts[2].substring(0, flagSep);
beginFlags = flagSep < 0 ? new char[0] : strategy.parseFlags(parts[2].substring(flagSep + 1));
replacement = parts.length == 3 ? null : parts[3];
}
@Override
public String toString() {
return endChars + " " + beginChars + (replacement == null ? "" : " -> " + replacement);
}
boolean prohibitsCompounding(CharsRef word, int breakPos, Root> rootBefore, Root> rootAfter) {
if (isNonAffixedPattern(endChars)) {
if (!charsMatch(word, breakPos - rootBefore.word.length(), rootBefore.word)) {
return false;
}
} else if (!charsMatch(word, breakPos - endChars.length(), endChars)) {
return false;
}
if (isNonAffixedPattern(beginChars)) {
if (!charsMatch(word, breakPos, rootAfter.word)) {
return false;
}
} else if (!charsMatch(word, breakPos, beginChars)) {
return false;
}
if (endFlags.length > 0 && !hasAllFlags(rootBefore, endFlags)) {
return false;
}
//noinspection RedundantIfStatement
if (beginFlags.length > 0 && !hasAllFlags(rootAfter, beginFlags)) {
return false;
}
return true;
}
private static boolean isNonAffixedPattern(String pattern) {
return pattern.length() == 1 && pattern.charAt(0) == '0';
}
private boolean hasAllFlags(Root> root, char[] flags) {
for (char flag : flags) {
if (!dictionary.hasFlag(root.entryId, flag)) {
return false;
}
}
return true;
}
CharsRef expandReplacement(CharsRef word, int breakPos) {
if (replacement != null && charsMatch(word, breakPos, replacement)) {
return new CharsRef(
new String(word.chars, 0, word.offset + breakPos)
+ endChars
+ beginChars
+ word.subSequence(breakPos + replacement.length(), word.length));
}
return null;
}
int endLength() {
return endChars.length();
}
private static boolean charsMatch(CharsRef word, int offset, CharSequence pattern) {
int len = pattern.length();
if (word.length - offset < len || offset < 0 || offset > word.length) {
return false;
}
for (int i = 0; i < len; i++) {
if (word.chars[word.offset + offset + i] != pattern.charAt(i)) {
return false;
}
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy