All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.hunspell.CheckCompoundPattern Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.hunspell;

import org.apache.lucene.util.CharsRef;

class CheckCompoundPattern {
  private final String endChars;
  private final String beginChars;
  private final String replacement;
  private final char[] endFlags;
  private final char[] beginFlags;
  private final Dictionary dictionary;

  CheckCompoundPattern(
      String unparsed, Dictionary.FlagParsingStrategy strategy, Dictionary dictionary) {
    this.dictionary = dictionary;
    String[] parts = unparsed.split("\\s+");
    if (parts.length < 3) {
      throw new IllegalArgumentException("Invalid pattern: " + unparsed);
    }

    int flagSep = parts[1].indexOf("/");
    endChars = flagSep < 0 ? parts[1] : parts[1].substring(0, flagSep);
    endFlags = flagSep < 0 ? new char[0] : strategy.parseFlags(parts[1].substring(flagSep + 1));

    flagSep = parts[2].indexOf("/");
    beginChars = flagSep < 0 ? parts[2] : parts[2].substring(0, flagSep);
    beginFlags = flagSep < 0 ? new char[0] : strategy.parseFlags(parts[2].substring(flagSep + 1));

    replacement = parts.length == 3 ? null : parts[3];
  }

  @Override
  public String toString() {
    return endChars + " " + beginChars + (replacement == null ? "" : " -> " + replacement);
  }

  boolean prohibitsCompounding(CharsRef word, int breakPos, Root rootBefore, Root rootAfter) {
    if (isNonAffixedPattern(endChars)) {
      if (!charsMatch(word, breakPos - rootBefore.word.length(), rootBefore.word)) {
        return false;
      }
    } else if (!charsMatch(word, breakPos - endChars.length(), endChars)) {
      return false;
    }

    if (isNonAffixedPattern(beginChars)) {
      if (!charsMatch(word, breakPos, rootAfter.word)) {
        return false;
      }
    } else if (!charsMatch(word, breakPos, beginChars)) {
      return false;
    }

    if (endFlags.length > 0 && !hasAllFlags(rootBefore, endFlags)) {
      return false;
    }
    //noinspection RedundantIfStatement
    if (beginFlags.length > 0 && !hasAllFlags(rootAfter, beginFlags)) {
      return false;
    }

    return true;
  }

  private static boolean isNonAffixedPattern(String pattern) {
    return pattern.length() == 1 && pattern.charAt(0) == '0';
  }

  private boolean hasAllFlags(Root root, char[] flags) {
    for (char flag : flags) {
      if (!dictionary.hasFlag(root.entryId, flag)) {
        return false;
      }
    }
    return true;
  }

  CharsRef expandReplacement(CharsRef word, int breakPos) {
    if (replacement != null && charsMatch(word, breakPos, replacement)) {
      return new CharsRef(
          new String(word.chars, 0, word.offset + breakPos)
              + endChars
              + beginChars
              + word.subSequence(breakPos + replacement.length(), word.length));
    }
    return null;
  }

  int endLength() {
    return endChars.length();
  }

  private static boolean charsMatch(CharsRef word, int offset, CharSequence pattern) {
    int len = pattern.length();
    if (word.length - offset < len || offset < 0 || offset > word.length) {
      return false;
    }

    for (int i = 0; i < len; i++) {
      if (word.chars[word.offset + offset + i] != pattern.charAt(i)) {
        return false;
      }
    }
    return true;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy