Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2012-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package laika.parse.text
import cats.data.NonEmptySet
import laika.parse.{ Failure, Message, Parser, Success }
import laika.parse.combinator.Parsers
/** Base text parsers that provide optimized low-level parsers for typical requirements
* of text markup parsers. In particular they are meant as an efficient replacement
* for scenarios where usually regex parsers are used. In cases where different parsers
* need to be tried for relatively short input sequences, regex parsers tend to be less
* efficient. Furthermore, these base parsers may also improve readability, as it
* allows to combine simple low-level parsers to higher-level parsers based on the
* Laika combinator API, instead of producing long regexes which may be hard to read.
*
* @author Jens Halm
*/
trait TextParsers extends Parsers {
/** Creates a NonEmptySet from a Character range.
* This set can then be passed to parsers like `anyOf` or `oneOf`
* which expect a NonEmptySet as a parameter.
*/
def range(fromChar: Char, toChar: Char): NonEmptySet[Char] = {
val range = if (fromChar > toChar) toChar to fromChar else fromChar to toChar
NonEmptySet.of(range.head, range.tail: _*)
}
/** A parser that matches only the specified literal string.
*
* The method is implicit so that strings can automatically be lifted to their parsers.
*/
def literal(expected: String): PrefixedParser[String] = new Literal(expected)
/** Parses horizontal whitespace (space and tab).
* Always succeeds, consuming all whitespace found.
*/
lazy val ws: Characters[String] = anyOf(' ', '\t')
/** Succeeds at the end of a line, including the end of the input.
* Produces an empty string as a result and consumes any new line characters.
*/
val eol: Parser[Unit] = Parser { in =>
if (in.atEnd) Success((), in)
else if (in.char == '\n') Success((), in.consume(1))
else if (in.char == '\r' && in.remaining > 1 && in.charAt(1) == '\n') Success((), in.consume(2))
else Failure(Message.ExpectedEOL, in)
}
/** Parses any number of whitespace characters followed
* by a newline character.
*/
val wsEol: Parser[Unit] = ws.void ~> eol
/** Succeeds at the end of the input.
*/
val eof: Parser[String] = Parser { in =>
if (in.atEnd) Success("", in)
else Failure(Message.ExpectedEOF, in)
}
/** Succeeds at the start of the input.
*/
val atStart: Parser[Unit] = Parser { in =>
if (in.offset == 0) Success((), in)
else Failure(Message.ExpectedStart, in)
}
/** Parses a blank line from the current input offset (which may not be at the
* start of the line). Fails for lines that contain any non-whitespace character.
* Does always produce an empty string as the result, discarding any whitespace
* characters found in the line.
*
* Since it also succeeds at the end of the input
* it should never be used in the form of `(blankLine *)` or `(blankLine +)`. Use
* the `blankLines` parser instead in these cases.
*/
val blankLine: Parser[String] = wsEol.as("")
/** Parses one or more blank lines, producing a list of empty strings corresponding
* to the number of blank lines consumed.
*/
val blankLines: Parser[List[String]] = (not(eof) ~> blankLine).rep.min(1)
/** Parses the rest of the line from the current input offset no matter whether
* it consist of whitespace only or some text. Does not include the eol character(s).
*/
val restOfLine: Parser[String] = anyNot('\n', '\r') <~ eol
/** Parses a single text line from the current input offset (which may not be at the
* start of the line). Fails for blank lines. Does not include the eol character(s).
*/
val textLine: Parser[String] = not(blankLine) ~> restOfLine
/** Verifies that the previous character is not one of those specified.
* Succeeds at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevNot(char: Char, chars: Char*): Parser[Unit] = prevNot(NonEmptySet.of(char, chars: _*))
/** Verifies that the previous character is not one of those specified.
* Succeeds at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevNot(chars: NonEmptySet[Char]): Parser[Unit] = prevNot(chars.contains(_))
/** Verifies that the previous character does not satisfy the specified predicate.
* Succeeds at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevNot(predicate: Char => Boolean): Parser[Unit] = {
val errMsg: Char => Message = Message.forRuntimeValue[Char] { found =>
s"previous character '$found' does not satisfy the specified predicate"
}
Parser { in =>
if (in.offset == 0) Success((), in)
else if (!predicate(in.charAt(-1))) Success((), in)
else Failure(errMsg(in.charAt(-1)), in)
}
}
/** Verifies that the next character is not one of those specified.
* Succeeds at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextNot(char: Char, chars: Char*): Parser[Unit] = nextNot(NonEmptySet.of(char, chars: _*))
/** Verifies that the next character is not one of those specified.
* Succeeds at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextNot(chars: NonEmptySet[Char]): Parser[Unit] = nextNot(chars.contains(_))
/** Verifies that the next character does not satisfy the specified predicate.
* Succeeds at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextNot(predicate: Char => Boolean): Parser[Unit] = {
val errMsg: Char => Message = Message.forRuntimeValue[Char] { found =>
s"next character '$found' does not satisfy the specified predicate"
}
Parser { in =>
if (in.remaining == 0) Success((), in)
else if (!predicate(in.char)) Success((), in)
else Failure(errMsg(in.char), in)
}
}
/** Verifies that the previous character is one of those specified.
* Fails at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevIn(char: Char, chars: Char*): Parser[Unit] = prevIn(NonEmptySet.of(char, chars: _*))
/** Verifies that the previous character is one of those specified.
* Fails at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevIn(chars: NonEmptySet[Char]): Parser[Unit] = prevIs(chars.contains(_))
/** Verifies that the previous character satisfies the specified predicate.
* Fails at the start of the input and does not consume any input
* or produce a result when it succeeds.
*/
def prevIs(predicate: Char => Boolean): Parser[Unit] = {
val errMsg: Char => Message = Message.forRuntimeValue[Char] { found =>
s"previous character '$found' does not satisfy the specified predicate"
}
def atStart: Message = Message.fixed("unable to check predicate on start of input")
Parser { in =>
if (in.offset == 0) Failure(atStart, in)
else if (predicate(in.charAt(-1))) Success((), in)
else Failure(errMsg(in.charAt(-1)), in)
}
}
/** Verifies that the next character is one of those specified.
* Fails at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextIn(char: Char, chars: Char*): Parser[Unit] = nextIn(NonEmptySet.of(char, chars: _*))
/** Verifies that the next character is one of those specified.
* Fails at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextIn(chars: NonEmptySet[Char]): Parser[Unit] = nextIs(chars.contains(_))
/** Verifies that the next character does not satisfy the specified predicate.
* Fails at the end of the input and does not consume any input
* or produce a result when it succeeds.
*/
def nextIs(predicate: Char => Boolean): Parser[Unit] = {
val errMsg: Char => Message = Message.forRuntimeValue[Char] { found =>
s"next character '$found' does not satisfy the specified predicate"
}
def atEnd: Message = Message.fixed("unable to check predicate on end of input")
Parser { in =>
if (in.remaining == 0) Failure(atEnd, in)
else if (predicate(in.char)) Success((), in)
else Failure(errMsg(in.char), in)
}
}
/** Consumes any kind of input, always succeeds.
* This parser would consume the entire input unless a `max` constraint
* is specified.
*/
val anyChars: Characters[String] = Characters.anyWhile(_ => true)
/** Consumes any number of consecutive occurrences of the specified characters.
* Always succeeds unless a minimum number of required matches is specified.
*/
def anyOf(char: Char, chars: Char*): Characters[String] = Characters.include(char +: chars)
/** Consumes any number of consecutive occurrences of the specified characters.
* Always succeeds unless a minimum number of required matches is specified.
*/
def anyOf(chars: NonEmptySet[Char]): Characters[String] =
Characters.include(chars.toSortedSet.toSeq)
/** Consumes any number of consecutive characters that are not one of the specified characters.
* Always succeeds unless a minimum number of required matches is specified.
*/
def anyNot(char: Char, chars: Char*): Characters[String] = Characters.exclude(char +: chars)
/** Consumes any number of consecutive occurrences that are not one of the specified characters.
* Always succeeds unless a minimum number of required matches is specified.
*/
def anyNot(chars: NonEmptySet[Char]): Characters[String] =
Characters.exclude(chars.toSortedSet.toSeq)
/** Consumes any number of consecutive characters which satisfy the specified predicate.
* Always succeeds unless a minimum number of required matches is specified.
*/
def anyWhile(p: Char => Boolean): Characters[String] = Characters.anyWhile(p)
/** Consumes one character if it matches one of the specified characters, fails otherwise.
*/
def oneOf(char: Char, chars: Char*): PrefixedParser[String] = {
val startChars = NonEmptySet.of(char, chars: _*)
new PrefixCharacters(anyOf(startChars).take(1), startChars)
}
/** Consumes one character if it matches one of the specified characters, fails otherwise.
*/
def oneOf(chars: NonEmptySet[Char]): PrefixedParser[String] =
new PrefixCharacters(anyOf(chars).take(1), chars)
/** Consumes one character if it is not one of the specified characters.
*/
def oneNot(char: Char, chars: Char*): Parser[String] = Characters.exclude(char +: chars).take(1)
/** Consumes one character if it is not one of the specified characters.
*/
def oneNot(chars: NonEmptySet[Char]): Parser[String] =
Characters.exclude(chars.toSortedSet.toSeq).take(1)
/** Consumes one character if it satisfies the specified predicate, fails otherwise.
*/
def oneIf(p: Char => Boolean): Parser[String] = Characters.anyWhile(p).take(1)
/** Parses exactly one character from the input, fails only at the end of the input.
*/
val oneChar: Parser[String] = anyChars.take(1)
/** Consumes one or more characters if they match one of the specified characters,
* fails if the first character does not match.
*/
def someOf(char: Char, chars: Char*): PrefixCharacters[String] = {
val startChars = NonEmptySet.of(char, chars: _*)
new PrefixCharacters(anyOf(startChars).min(1), startChars)
}
/** Consumes one or more characters if they match one of the specified characters,
* fails if the first character does not match.
*/
def someOf(chars: NonEmptySet[Char]): PrefixCharacters[String] =
new PrefixCharacters(anyOf(chars).min(1), chars)
/** Consumes one or more characters that are not one of the specified characters,
* fails for empty results.
*/
def someNot(char: Char, chars: Char*): Characters[String] =
Characters.exclude(char +: chars).min(1)
/** Consumes one or more characters that are not one of the specified characters,
* fails for empty results.
*/
def someNot(chars: NonEmptySet[Char]): Characters[String] =
Characters.exclude(chars.toSortedSet.toSeq).min(1)
/** Consumes one or more characters which satisfy the specified predicate,
* fails for empty results.
*/
def someWhile(p: Char => Boolean): Characters[String] = Characters.anyWhile(p).min(1)
/** Consumes any number of consecutive characters until one of the specified characters
* is encountered on the input string.
*/
def delimitedBy(char: Char, chars: Char*): DelimitedText = new DelimitedText(
TextDelimiter(oneOf(char, chars: _*))
)
/** Consumes any number of consecutive characters until one of the specified characters
* is encountered on the input string.
*/
def delimitedBy(chars: NonEmptySet[Char]): DelimitedText = new DelimitedText(
TextDelimiter(oneOf(chars))
)
/** Consumes any number of consecutive characters until the specified string delimiter
* is encountered on the input string.
*/
def delimitedBy(str: String): DelimitedText =
if (str.isEmpty) DelimitedText.Undelimited
else delimitedBy(literal(str))
/** Consumes any number of consecutive characters until the specified delimiter parser
* succeeds on the input.
*
* This constructor is limited to the sub-trait `PrefixedParser`
* as only those can be optimized for an assertion that needs to be performed on each
* character. Most parsers for non-empty text implement this trait, e.g `oneOf`, `someOf`,
* `delimiter` or the literal parsers for a character or string.
*/
def delimitedBy(delimiter: PrefixedParser[String]): DelimitedText = new DelimitedText(
TextDelimiter(delimiter)
)
/** Creates a parser for a delimiter based on the given set of delimiter characters
* with an API that allows to specify predicates for the characters immediately
* preceding or following the delimiter, a common task in markup parsing.
*/
def delimiter(char: Char, chars: Char*): DelimiterParser = new DelimiterParser(
oneOf(char, chars: _*)
)
/** Creates a parser for a delimiter based on a literal string with an API that
* allows to specify predicates for the characters immediately
* preceding or following the delimiter, a common task in markup parsing.
*/
def delimiter(delim: String): DelimiterParser = new DelimiterParser(literal(delim))
/** Creates a parser for a delimiter with an API that allows to specify
* predicates for the characters immediately preceding or following
* the delimiter, a common task in markup parsing.
*
* This specified underlying parser needs to implement the sub-trait `PrefixedParser`
* as only those can be optimized for an assertion that needs to be performed on each
* character. Most parsers for non-empty text implement this trait, e.g `oneOf`, `someOf`,
* `delimiter` or the literal parsers for a character or string.
*/
def delimiter(parser: PrefixedParser[String]): DelimiterParser = new DelimiterParser(parser)
}
/** Instance that allows to import all text parsers in isolation.
*
* Usually it is more convenient to import laika.parse.api._
* to get all parser builders with one import.
*/
object TextParsers extends TextParsers