Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2012-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package laika.internal.markdown
import laika.api.bundle.{ BlockParserBuilder, BlockPosition }
import laika.ast.*
import laika.parse.{ BlockSource, LineSource, Parser }
import laika.parse.markup.RecursiveParsers
import laika.parse.builders.*
import laika.parse.syntax.*
import laika.parse.text.{ PrefixedParser, WhitespacePreprocessor }
/** Provides all block parsers for Markdown text except for lists
* which are factored out into a separate parser object
* and those blocks dealing with verbatim HTML markup
* which this library treats as an optional feature that has to be explicitly mixed in.
*
* Block parsers are only concerned with splitting the document into (potentially nested) blocks.
* They are used in the first phase of parsing, while delegating to inline parsers for the 2nd phase.
*
* @author Jens Halm
*/
private[laika] object BlockParsers {
/** Parses a single tab or space character.
*/
val tabOrSpace: Parser[Unit] = oneOf(' ', '\t').void
/** Parses up to 3 space characters.
* In Markdown an indentation of up to 3 spaces is optional
* and does not have any influence on the parsing logic.
*/
val insignificantSpaces: Parser[Unit] = anyOf(' ').max(3).void
/** Parses the decoration (underline) of a setext header.
*/
private val setextDecoration: Parser[String] = (someOf('=') | someOf('-')) <~ wsEol
/** Parses a single Markdown block. In contrast to the generic block parser of the
* generic block parsers this method also consumes and ignores up to three optional space
* characters at the start of each line.
*
* @param firstLinePrefix parser that recognizes the start of the first line of this block
* @param linePrefix parser that recognizes the start of subsequent lines that still belong to the same block
* @param nextBlockPrefix parser that recognizes whether a line after one or more blank lines still belongs to the same block
*/
def mdBlock(
firstLinePrefix: Parser[Any],
linePrefix: Parser[Any],
nextBlockPrefix: Parser[Any]
): Parser[BlockSource] = {
block(firstLinePrefix, insignificantSpaces ~ linePrefix, nextBlockPrefix)
}
/** Parses a single Markdown block. In contrast to the `mdBlock` parser
* this method also verifies that the second line is not a setext header decoration.
*
* @param firstLinePrefix parser that recognizes the start of the first line of this block
* @param linePrefix parser that recognizes the start of subsequent lines that still belong to the same block
* @param nextBlockPrefix parser that recognizes whether a line after one or more blank lines still belongs to the same block
*/
private def decoratedBlock(
firstLinePrefix: Parser[Any],
linePrefix: Parser[Any],
nextBlockPrefix: Parser[Any]
): Parser[BlockSource] = {
val skipLine = anyNot('\n', '\r').void <~ eol
val noHeader = lookAhead(skipLine ~ not(setextDecoration))
mdBlock(noHeader ~ firstLinePrefix, linePrefix, nextBlockPrefix)
}
lazy val rootHeaderOrParagraph: BlockParserBuilder =
BlockParserBuilder.recursive(headerOrParagraph(_, BlockPosition.RootOnly)).rootOnly
lazy val nestedHeaderOrParagraph: BlockParserBuilder =
BlockParserBuilder.recursive(headerOrParagraph(_, BlockPosition.NestedOnly)).nestedOnly
private def headerOrParagraph(recParsers: RecursiveParsers, pos: BlockPosition): Parser[Block] = {
val interruptions = recParsers.paragraphInterruptions(pos)
val line = not(blankLine) ~> restOfLine.line
val lineAndCond = interruptions.map(res => (Nil, Some(res))) | line.repUntil(interruptions)
val decorationOrLines: Parser[Either[String, (Seq[LineSource], Option[Block])]] =
setextDecoration.map { Left(_) } | lineAndCond.map { Right(_) }
def decoratedHeaderLevel(decoration: String) = if (decoration.head == '=') 1 else 2
/* Merges the specified list of lines into a single string,
* while looking for lines ending with double spaces which (sadly) stand for a hard line break in Markdown.
*/
def processLineBreaks(line: LineSource): LineSource =
/* add a special sequence for hard line breaks so that the
* inline parser does not have to stop at each space character */
if (line.input.endsWith(" ")) LineSource(line.input.dropRight(2) ++ "\\\r", line.parent)
else line
def paragraph(firstLine: LineSource, restLines: Seq[LineSource]): Paragraph =
Paragraph(
recParsers.recursiveSpans.parseAndRecover(
BlockSource(processLineBreaks(firstLine), restLines.map(processLineBreaks): _*)
)
)
(textLine.line ~ decorationOrLines).map {
case firstLine ~ Right((restLines, None)) => paragraph(firstLine, restLines)
case firstLine ~ Right((restLines, Some(list))) =>
BlockSequence(paragraph(firstLine, restLines), list)
case text ~ Left(decoration) =>
Header(decoratedHeaderLevel(decoration), recParsers.recursiveSpans.parseAndRecover(text))
}
}
val linkTarget: BlockParserBuilder = BlockParserBuilder.withEscapedText { escapedParsers =>
import escapedParsers._
val id = "[" ~> escapedUntil(']').map(_.toLowerCase) <~ ":" <~ ws.void
val url =
("<" ~> escapedUntil('>')) | escapedText(delimitedBy(' ', '\n').acceptEOF.keepDelimiter)
def enclosedBy(start: Char, end: Char) =
start.toString ~> delimitedBy(end.toString <~ lookAhead(wsEol)).failOn('\r', '\n')
val title =
(ws.void ~ opt(eol) ~ ws.void) ~> (enclosedBy('"', '"') | enclosedBy('\'', '\'') | enclosedBy(
'(',
')'
))
(id ~ url ~ opt(title) <~ wsEol).mapN(LinkDefinition.create)
}.rootOnly
val atxHeader: BlockParserBuilder = BlockParserBuilder.recursive { recParsers =>
def stripDecoration(text: String) = text.trim.reverse.dropWhile(_ == '#').reverse.trim
val level = someOf('#').max(6).count
val text = recParsers.recursiveSpans(restOfLine.map(stripDecoration).line)
(level ~ (not(blankLine) ~ ws ~> text)).mapN(Header(_, _))
}
val rules: BlockParserBuilder = BlockParserBuilder.standalone {
val decoChar = oneOf('*', '-', '_')
val pattern = (decoChar ~ (anyOf(' ').void ~ decoChar).rep.min(2)).as(Rule())
pattern <~ wsEol
}
val literalBlocks: BlockParserBuilder = BlockParserBuilder.standalone {
val wsPreProcessor = WhitespacePreprocessor.forString
PrefixedParser(' ', '\t') {
decoratedBlock(tabOrSpace, tabOrSpace, tabOrSpace).map { lines =>
LiteralBlock(wsPreProcessor(lines.input))
}
}
}
val quotedBlock: BlockParserBuilder = BlockParserBuilder.recursive { recParsers =>
PrefixedParser('>') {
val decoratedLine = ">" ~ ws.max(1).void
recParsers
.recursiveBlocks(
decoratedBlock(decoratedLine, decoratedLine | not(blankLine), literal(">"))
)
.map(QuotedBlock(_, Nil))
}
}
/** Parses just a plain paragraph after the maximum nest level has been reached.
* This is necessary as a separate parser as the default markdown paragraph parser
* is combined with potentially nested lists which makes that parser recursive.
*/
val fallbackParagraph: BlockParserBuilder = BlockParserBuilder.withSpans { spanParsers =>
val block = textLine.rep.min(1).map(_.mkString).line
spanParsers.recursiveSpans(block).map(Paragraph(_))
}.nestedOnly.withLowPrecedence
}