All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.arturopala.gitignore.Glob.scala Maven / Gradle / Ivy

/*
 * Copyright 2021 Artur Opala
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.arturopala.gitignore

/** Globbing pathnames.
  *
  * Wildcard pattern matcher implementing the same rules as
  * https://www.man7.org/linux/man-pages/man7/glob.7.html
  *
  * Note that wildcard patterns are not regular expressions, although
  * they are a bit similar.  First of all, they match filenames,
  * rather than text, and secondly, the conventions are not the same:
  * for example, in a regular expression '*' means zero or more
  * copies of the preceding thing.
  *
  * Pattern syntax:
  *
  * A string is a wildcard pattern if it contains one of the
  * characters '?', '*' or '['.  Globbing is the operation that
  * expands a wildcard pattern into the list of pathnames matching
  * the pattern. Matching is defined by:
  *
  * A '?' (not between brackets) matches any single character.
  *
  * A '*' (not between brackets) matches any string, including the
  *       empty string.
  *
  * A '/' in a pathname cannot be matched by a '?' or '*' wildcard,
  *       or by a range like "[.-0]". A range containing an explicit
  *       '/' character is syntactically incorrect.
  *
  * An expression "[...]" where the first character after the leading
  * '[' is not an '!' matches a single character, namely any of the
  * characters enclosed by the brackets.  The string enclosed by the
  * brackets cannot be empty; therefore ']' can be allowed between
  * the brackets, provided that it is the first character.  (Thus,
  * "[][!]" matches the three characters '[', ']' and '!'.)
  *
  * There is one special convention: two characters separated by '-'
  * denote a range.  (Thus, "[A-Fa-f0-9]" is equivalent to
  * "[ABCDEFabcdef0123456789]".)  One may include '-' in its literal
  * meaning by making it the first or last character between the
  * brackets.  (Thus, "[]-]" matches just the two characters ']' and
  * '-', and "[--0]" matches the three characters '-', '.', '0',
  * since '/' cannot be matched.)
  *
  * Now that regular expressions have bracket expressions where the
  * negation is indicated by a '^', POSIX has declared the effect of
  * a wildcard pattern "[^...]" to be undefined.
  *
  * An expression "[!...]" matches a single character, namely any
  * character that is not matched by the expression obtained by
  * removing the first '!' from it.  (Thus, "[!]a-]" matches any
  * single character except ']', 'a' and '-'.)
  *
  * One can remove the special meaning of '?', '*' and '[' by
  * preceding them by a backslash, or, in case this is part of a
  * shell command line, enclosing them in quotes.  Between brackets
  * these characters stand for themselves.  Thus, "[[?*\]" matches
  * the four characters '[', '?', '*' and '\'.
  */
object Glob {

  final def isWildcardPattern(pattern: String): Boolean =
    pattern.foldLeft(false) { (a, c) =>
      a || (c match {
        case '?' | '*' | '[' => true
        case _               => false
      })
    }

  /** Compile pattern expression as re-usable [[Pattern]] instance. */
  final def compile(pattern: String): Pattern = {

    val (patterns, remaining, _, outsideBracket) = pattern
      .foldLeft((List.empty[Pattern], "", true, true)) { case ((ps, acc, notEscaped, outsideBracket), c) =>
        if (c == '?' && notEscaped && outsideBracket) {
          if (acc.isEmpty)
            (AnySingleCharacterPattern :: ps, "", true, true)
          else
            (AnySingleCharacterPattern :: LiteralPattern(acc) :: ps, "", true, true)
        } else if (c == '*' && notEscaped && outsideBracket) {
          if (acc.isEmpty)
            ps match {
              case AnyStringPattern :: ps2 =>
                (AnythingPattern :: ps2, "", true, true)
              case _ =>
                (AnyStringPattern :: ps, "", true, true)
            }
          else
            (AnyStringPattern :: LiteralPattern(acc) :: ps, "", true, true)
        } else if (c == '[' && notEscaped && outsideBracket) {
          if (acc.isEmpty) (ps, "", true, false)
          else (LiteralPattern(acc) :: ps, "", true, false)
        } else if (c == ']' && notEscaped && !outsideBracket && acc.nonEmpty) {
          (new BracketPattern(acc) :: ps, "", true, true)
        } else {
          if (notEscaped)
            (ps, acc + c, c != '\\', outsideBracket)
          else
            (ps, acc.dropRight(1) + c, true, outsideBracket)
        }
      }

    val remaining2 =
      if (outsideBracket) remaining else "[" + remaining

    // add remaining string as a literal pattern
    val patterns2 = if (remaining2.nonEmpty) {
      patterns match {
        case Nil => LiteralPattern(remaining2) :: Nil
        case _   => LiteralPattern(remaining2) :: patterns
      }
    } else patterns

    patterns2 match {
      case Nil      => throw new Exception(s"Glob compilation failed for pattern: $pattern")
      case p :: Nil => p
      case _        => CompositePattern(patterns2.reverse)
    }

  }

  /** A compiled representation of a glob pattern. */
  sealed trait Pattern {

    /** Creates a matcher that will match the given input against this pattern. */
    final def matcher(input: CharSequence): Matcher =
      Matcher(input, this)

    /** A minimum width of the string to be considered a match for this pattern. */
    def minWidth: Int

    /** Returns the regular expression from which this pattern was compiled. */
    val pattern: String

  }

  /** Character check defined between brackets, either class or range. */
  sealed trait CharacterCheck {

    /** Check if given character can be accepted. */
    def check(c: Char): Boolean
  }

  /** A type of pattern with variable match length,
    * which can possibly consume nothing or all the remaining input.
    */
  sealed trait WildcardPattern extends Pattern {
    override val minWidth: Int = 0
  }

  /** A type of pattern matching single character only. */
  sealed trait SingleCharacterPattern extends Pattern with WildcardPattern {
    override val minWidth: Int = 1
    def matches(c: Char): Boolean
  }

  /** A pattern consisting of a sequence of nested patterns. */
  final case class CompositePattern(patterns: List[Pattern]) extends Pattern {
    override val minWidth: Int = patterns.foldLeft(0)(_ + _.minWidth)
    override val pattern: String = patterns.foldLeft("")(_ + _.pattern)
  }

  /** A pattern matching literally, without any wildcards. */
  final case class LiteralPattern(pattern: String) extends Pattern {
    override def minWidth: Int = pattern.length()
  }

  /** A wildcard pattern matching anything but path separator '/' character. */
  final case object AnyStringPattern extends Pattern with WildcardPattern {
    override val pattern: String = "*"
  }

  /** A wildcard pattern matching anything, inluding path separator. */
  final case object AnythingPattern extends Pattern with WildcardPattern {
    override val pattern: String = "**"
  }

  /** A wildcard pattern matching any single character except path separator '/' character. */
  final object AnySingleCharacterPattern extends Pattern with SingleCharacterPattern {
    override def matches(c: Char): Boolean = c != '/'
    override val pattern: String = "?"
  }

  /** A wildcard pattern matching either class or range of characters. */
  final case class BracketPattern(pattern: String) extends Pattern with SingleCharacterPattern {
    val characterCheck: CharacterCheck = CharacterCheck.compile(pattern)
    override def matches(c: Char): Boolean = characterCheck.check(c)
  }

  /** Support for character classes and ranges. */
  final object CharacterCheck {

    /** Compile expression between the brackets into a [[CharacterCheck]]. */
    def compile(pattern: String): CharacterCheck =
      if (pattern.isEmpty()) throw new Exception("A character check pattern cannot be empty.")
      else if (pattern.head == '!') {
        val checks = CharacterCheck.compileInternal(pattern.drop(1))
        NegatedCompositeCharacterCheck(checks)
      } else
        compileInternal(pattern) match {
          case ch :: Nil => ch
          case checks    => CompositeCharacterCheck(checks)
        }

    def compileInternal(pattern: String): List[CharacterCheck] = {
      val (list, remaining, _) = pattern
        .foldLeft((List.empty[CharacterCheck], "", false)) { case ((ls, acc, isRange), c) =>
          if (c == '/')
            throw new Exception(
              "A character check range containing an explicit '/' character is syntactically incorrect."
            )
          else if (c == '-' && acc.nonEmpty)
            (
              if (acc.length() > 1) CharacterClassCheck(acc.dropRight(1)) :: ls else ls,
              acc.takeRight(1) + '-',
              true
            )
          else if (isRange)
            (CharacterRangeCheck(acc.head, c) :: ls, "", false)
          else
            (ls, acc + c, false)
        }
      if (remaining.nonEmpty) CharacterClassCheck(remaining) :: list
      else list
    }
  }

  /** Composite check nesting a sequence of positive checks. */
  final case class CompositeCharacterCheck(checks: List[CharacterCheck]) extends CharacterCheck {
    override def check(c: Char): Boolean =
      c != '/' && checks.foldLeft(false)((a, ck) => a || ck.check(c))
  }

  /** Composite check nesting a sequence of negative checks. */
  final case class NegatedCompositeCharacterCheck(checks: List[CharacterCheck]) extends CharacterCheck {
    override def check(c: Char): Boolean =
      c != '/' && !checks.foldLeft(false)((a, ck) => a || ck.check(c))
  }

  /** Checks if the character is of any of the provided characters.
    *
    * The string cannot be empty; therefore ']' can be allowed,
    * provided that it is the first character.
    * (Thus, "][!" matches the three characters '[', ']' and '!'.)
    */
  final case class CharacterClassCheck(characters: String) extends CharacterCheck {
    override def check(c: Char): Boolean =
      c != '/' && characters.contains(c)
  }

  /** Checks if the character is between provided range.
    *
    * There is one special convention: two characters separated by '-'
    * denote a range.  (Thus, "[A-Fa-f0-9]" is equivalent to
    * "[ABCDEFabcdef0123456789]".)  One may include '-' in its literal
    * meaning by making it the first or last character between the
    * brackets.  (Thus, "[]-]" matches just the two characters ']' and
    * '-', and "[--0]" matches the three characters '-', '.', '0',
    * since '/' cannot be matched.)
    */
  final case class CharacterRangeCheck(from: Char, to: Char) extends CharacterCheck {
    override def check(c: Char): Boolean =
      c != '/' && c >= from && c <= to
  }

  /** An engine that performs match operations on a character sequence by interpreting a Pattern. */
  sealed trait Matcher {

    /** Attempts to find the next subsequence of the input sequence that matches the pattern. */
    def find(): Boolean

    /** Returns the start index of the previous match.
      * @throws IllegalStateException - If no match has yet been attempted, or if the previous match operation failed
      */
    def start(): Int

    /** Returns the offset after the last character matched.
      * @throws IllegalStateException - If no match has yet been attempted, or if the previous match operation failed
      */
    def end(): Int
  }

  final object Matcher {

    final def apply(value: CharSequence, pattern: Pattern): Matcher =
      new Matcher {

        var r = false
        var s = Int.MaxValue
        var e = Int.MinValue

        override def find(): Boolean = {
          val (r1, s1, e1) =
            Matcher.find(value, pattern, if (s == Int.MaxValue) 0 else s + 1)
          r = r1
          s = s1
          e = e1
          r
        }

        override def start(): Int =
          if (r && s != Int.MaxValue) s
          else throw new IllegalStateException

        override def end(): Int =
          if (r && e != Int.MinValue) e
          else throw new IllegalStateException
      }

    final def find(value: CharSequence, pattern: Pattern, startPosition: Int = 0): (Boolean, Int, Int) = {
      Debug.debug(s"Finding ${pattern.pattern} as $pattern in $value\n----------------")
      val zoom = Zoom(value, startPosition)
      val possible = computeContour(zoom, pattern) &&
        zoom.closeUpFrameAndResetContour
      val (minFrom, maxTo) = zoom.frame
      var result = possible && findA(zoom, pattern, leftToRight = true, adjacent = false, level = 0)
      var continue = possible && !result
      if (continue && computeNextFrame(zoom.setFrame(minFrom, maxTo), pattern, maxTo)) {
        Debug.debug(s"Max to $maxTo\n   $zoom")
        while (continue) {
          Debug.debug("-" * 32)
          val (prevFrom, prevTo) = zoom.frame
          result = findA(zoom, pattern, leftToRight = true, adjacent = false, level = 0)
          continue = !result && computeNextFrame(zoom.setFrame(prevFrom, prevTo), pattern, maxTo)
        }
      }
      Debug.debug(result, s"${if (result) "Found" else "Not found"} at (${zoom.start()},${zoom.end()})")
      (result, zoom.start(), zoom.end())
    }

    final def computeNextFrame(zoom: Zoom, pattern: Pattern, maxTo: Int): Boolean = {
      var exists = true
      var possible = false
      while (exists && !possible) {
        exists = zoom.squeezeRightOrLeft(1, pattern.minWidth, maxTo)
        possible = computeContour(zoom, pattern) &&
          zoom.closeUpFrameAndResetContour
      }
      exists && possible
    }

    final def computeContour(zoom: Zoom, pattern: Pattern): Boolean = {
      val r = pattern match {
        case CompositePattern(Nil) =>
          false

        case cp @ CompositePattern(pattern :: patterns) =>
          def next(p: List[Pattern]): Zoom = p match {
            case Nil =>
              zoom
            case p :: Nil =>
              val z = zoom.copy
              computeContour(z, p)
              z
            case p :: ps =>
              val z = zoom.copy
              computeContour(z, p) &&
              z.unionContour(next(ps))
              z
          }
          computeContour(zoom, pattern) &&
          zoom.unionContour(next(patterns)) &&
          zoom.contourLength >= cp.minWidth

        case LiteralPattern(literal) =>
          zoom.lookupFor(literal)
        case p: SingleCharacterPattern =>
          zoom.lookupWhile(p.matches, maxSteps = 1)
        case AnyStringPattern =>
          zoom.takeAll()
        case AnythingPattern =>
          zoom.takeAll()
      }
      Debug.debug(r, s"Match ${pattern.pattern} is ${if (r) "possible" else "not possible"} in\n   $zoom")
      r
    }

    private def findA(zoom: Zoom, pattern: Pattern, leftToRight: Boolean, adjacent: Boolean, level: Int): Boolean = {
      Debug.debug(
        level,
        s"findA ${pattern.pattern} ${if (leftToRight) ">>>" else "<<<"} ${if (adjacent) "adjacent" else ""} in\n   $zoom"
      )
      pattern match {
        case CompositePattern(patterns) =>
          findB(zoom, patterns, leftToRight, adjacent, level)

        case LiteralPattern(literal) =>
          if (leftToRight) zoom.lookupRightFor(literal, if (adjacent) 0 else Int.MaxValue)
          else zoom.lookupLeftFor(literal, if (adjacent) 0 else Int.MaxValue)

        case p: SingleCharacterPattern =>
          if (leftToRight) zoom.lookupRightWhile(p.matches, 1)
          else zoom.lookupLeftWhile(p.matches, 1)

        case AnyStringPattern =>
          if (leftToRight) zoom.lookupRightUntil(_ == '/', minSteps = 0)
          else zoom.lookupLeftUntil(_ == '/', minSteps = 0)

        case AnythingPattern =>
          if (leftToRight) zoom.takeAllFromLeft()
          else zoom.takeAllFromRight()
      }
    }

    private def findB(
      zoom: Zoom,
      patterns: List[Pattern],
      leftToRight: Boolean,
      adjacent: Boolean,
      level: Int
    ): Boolean = {
      Debug.debug(
        level,
        s"findB P${patterns.size} L$level ${if (leftToRight) ">>>" else "<<<"} ${if (adjacent) "adjacent"
        else ""} in\n   $zoom"
      )
      val r = patterns match {
        case Nil => false

        case p :: Nil =>
          findA(zoom, p, leftToRight, adjacent, level)

        case (g: WildcardPattern) :: ps =>
          Debug.debug(s"L$level wildcard ${g.pattern}")
          val zoom1 = zoom.copyFrameAndResetContour
          zoom1.resizeFrame(g.minWidth, leftToRight)
          findB(zoom1, ps.reverse, !leftToRight, false, level + 1) &&
          zoom1.flipFrame(zoom, leftToRight) &&
          findA(zoom1, g, !leftToRight, true, level + 1) &&
          zoom.merge(zoom1)

        case p :: ps =>
          if (findA(zoom, p, leftToRight, adjacent, level)) {
            findB(zoom, ps, leftToRight, true, level)
          } else false
      }
      Debug.debug(level, r, s"findB ${patterns.size} L$level ${if (r) "found" else "not found"} in\n   $zoom")
      r
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy