org.apache.solr.common.util.GlobPatternUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-solrj Show documentation
Apache Solr Solrj
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.common.util;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;

/** Provides methods for matching glob patterns against input strings. */
public class GlobPatternUtil {

  /**
   * Matches an input string against a provided glob patterns. This uses the implementation from
   * Apache Commons IO FilenameUtils. We are just redoing the implementation here instead of
   * bringing in commons-io as a dependency.
   *
   * @see This
   *     uses code from Apache Commons IO
   * @param pattern the glob pattern to match against
   * @param input the input string to match against a glob pattern
   * @return true if the input string matches the glob pattern, false otherwise
   */
  public static boolean matches(String pattern, String input) {
    if (input == null && pattern == null) {
      return true;
    }
    if (input == null || pattern == null) {
      return false;
    }
    final String[] wcs = splitOnTokens(pattern);
    boolean anyChars = false;
    int textIdx = 0;
    int wcsIdx = 0;
    final Deque backtrack = new ArrayDeque<>(wcs.length);

    // loop around a backtrack stack, to handle complex * matching
    do {
      if (!backtrack.isEmpty()) {
        final int[] array = backtrack.pop();
        wcsIdx = array[0];
        textIdx = array[1];
        anyChars = true;
      }

      // loop whilst tokens and text left to process
      while (wcsIdx < wcs.length) {

        if (wcs[wcsIdx].equals("?")) {
          // ? so move to next text char
          textIdx++;
          if (textIdx > input.length()) {
            break;
          }
          anyChars = false;

        } else if (wcs[wcsIdx].equals("*")) {
          // set any chars status
          anyChars = true;
          if (wcsIdx == wcs.length - 1) {
            textIdx = input.length();
          }

        } else {
          // matching text token
          if (anyChars) {
            // any chars then try to locate text token
            textIdx = checkIndexOf(input, textIdx, wcs[wcsIdx]);
            if (textIdx == -1) {
              // token not found
              break;
            }
            final int repeat = checkIndexOf(input, textIdx + 1, wcs[wcsIdx]);
            if (repeat >= 0) {
              backtrack.push(new int[] {wcsIdx, repeat});
            }
          } else if (!input.regionMatches(false, textIdx, wcs[wcsIdx], 0, wcs[wcsIdx].length())) {
            // matching from current position
            // couldn't match token
            break;
          }

          // matched text token, move text index to end of matched token
          textIdx += wcs[wcsIdx].length();
          anyChars = false;
        }

        wcsIdx++;
      }

      // full match
      if (wcsIdx == wcs.length && textIdx == input.length()) {
        return true;
      }

    } while (!backtrack.isEmpty());

    return false;
  }

  /**
   * Splits a string into a number of tokens. The text is split by '?' and '*'. Where multiple '*'
   * occur consecutively they are collapsed into a single '*'.
   *
   * @see This
   *     uses code from Apache Commons IO
   * @param text the text to split
   * @return the array of tokens, never null
   */
  private static String[] splitOnTokens(final String text) {
    // used by wildcardMatch
    // package level so a unit test may run on this

    if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
      return new String[] {text};
    }

    final char[] array = text.toCharArray();
    final ArrayList list = new ArrayList<>();
    final StringBuilder buffer = new StringBuilder();
    char prevChar = 0;
    for (final char ch : array) {
      if (ch == '?' || ch == '*') {
        if (buffer.length() != 0) {
          list.add(buffer.toString());
          buffer.setLength(0);
        }
        if (ch == '?') {
          list.add("?");
        } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
          list.add("*");
        }
      } else {
        buffer.append(ch);
      }
      prevChar = ch;
    }
    if (buffer.length() != 0) {
      list.add(buffer.toString());
    }

    return list.toArray(new String[] {});
  }

  /**
   * Checks if one string contains another starting at a specific index using the case-sensitivity
   * rule.
   *
   * This method mimics parts of {@link String#indexOf(String, int)} but takes case-sensitivity
   * into account.
   *
   * @see This
   *     uses code from Apache Commons IO
   * @param str the string to check, not null
   * @param strStartIndex the index to start at in str
   * @param search the start to search for, not null
   * @return the first index of the search String, -1 if no match or {@code null} string input
   * @throws NullPointerException if either string is null
   * @since 2.0
   */
  private static int checkIndexOf(final String str, final int strStartIndex, final String search) {
    final int endIndex = str.length() - search.length();
    if (endIndex >= strStartIndex) {
      for (int i = strStartIndex; i <= endIndex; i++) {
        if (str.regionMatches(false, i, search, 0, search.length())) {
          return i;
        }
      }
    }
    return -1;
  }
}