All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.henkelmann.actuarius.InlineParsers.scala Maven / Gradle / Ivy

The newest version!
package eu.henkelmann.actuarius



/**
 * A parser for inline markdown, markdown escapes and XML escapes.
 * This is used by the result classes of the block parsers to handle
 * Markdown within a block.
 */

trait InlineParsers extends BaseParsers {

    /**
     * Defines how the output is formatted and whether inline xml elements are allowed.
     */
    def deco():Decorator = Decorator

    /////////////////////////////////////
    // Types we use for inline parsing //
    /////////////////////////////////////

    /**
     * Defines a lookup map for link definitions.
     */
    type LinkMap       = Map[String, LinkDefinition]

    /**
     * A set of tags we have already created. Used to prevent nesting a link in a link or an emphasis in an emphasis.
     */
    type VisitedTags   = Set[String]

    /**
     *  Keeps track of visited tags and provides a lookup for link ids.
     */
    case class InlineContext(val map:LinkMap, val tags:VisitedTags) {
        def this(m:LinkMap) = this(m, Set())
        def this()          = this(Map())
        def addTag(tag:String) = new InlineContext(map, tags + tag)
    }

    /** This array is used as a lookup for mapping markdown escapes
     * to the resulting char (if necessary already escaped for XML)
     * Disgusting, I know, but this is such an often called operation
     * that this is the fastest way to do it, even in the year 2010.
     */
    private val escapableMarkdownChars = new Array[String](127)
    escapableMarkdownChars('\\') = "\\"
    escapableMarkdownChars('`')  = "`"
    escapableMarkdownChars('*')  = "*"
    escapableMarkdownChars('_')  = "_"
    escapableMarkdownChars('{')  = "{"
    escapableMarkdownChars('}')  = "}"
    escapableMarkdownChars('[')  = "["
    escapableMarkdownChars(']')  = "]"
    escapableMarkdownChars('(')  = "("
    escapableMarkdownChars(')')  = ")"
    escapableMarkdownChars('#')  = "#"
    escapableMarkdownChars('+')  = "+"
    escapableMarkdownChars('-')  = "-"
    escapableMarkdownChars('=')  = "="
    escapableMarkdownChars('>')  = ">"
    escapableMarkdownChars('.')  = "."
    escapableMarkdownChars('!')  = "!"

    /**
     * Parses markdown text up to any of the chars defined in the given map.
     * used to quickly escape any text between special inline markdown like
     * emphasis.
     */
    def markdownText(special:Set[Char], markdownEscapes:Boolean) = Parser{ in =>
        if (in.atEnd) {
            Failure("End of input.", in)
        } else {
            var start = in.offset
            var i = in.offset
            val s = in.source
            val end = s.length
            val result = new StringBuffer()
            //process chars until we hit a special char or the end
            while (i
        //markdownEscape | br | code | xmlTag | //simple inline
        //a(ctx) | strong(ctx) | em(ctx) | fastA(ctx) | refA(ctx) | img(ctx) //recursive inline

    //}
    /* explicit match is faster than the map lookup
    private val elementParserLookup:Map[Char,(InlineContext=>Parser[String])] = Map(
        '\\' -> (ctx => aChar), ' ' -> (ctx => br), '`' -> (ctx => code), '<' -> (ctx => xmlTag | fastA(ctx)),
        '[' -> (ctx => a(ctx) | refA(ctx)), '*' -> (ctx => spanAsterisk(ctx)), '_' -> (ctx => spanUnderscore(ctx)),
        '!' -> (ctx => img(ctx))
    )
    */

    //TODO:better handling of "  \n" here. Stopping at every space costs us 20% time!
    /** Chars that may indicate the start of a special Markdown inline sequence.
     */
    val specialInlineChars = Set(' ', '`', '<', '[', '*', '_', '!')
    /** Chars that may indicate the start of a special markdown inline sequence or the end of a link text.
     */
    val specialLinkInlineChars = specialInlineChars + ']'

    /** Hand rolled parser that parses a chunk of special inline markdown (like links or emphasis)
     * based on a one char lookahead.
     */
    def elementParsers(ctx:InlineContext) = Parser{ in =>
        if (in.atEnd) {
            Failure("End of Input Reached", in)
        } else {
            in.first match {
                case ' ' => br(in)
                case '`' => code(in)
                case '<' => (xmlTag | fastLink(ctx))(in)
                case '[' => link(ctx)(in)
                case '*' => spanAsterisk(ctx)(in)
                case '_' => spanUnderscore(ctx)(in)
                case '!' => img(ctx)(in)
                case _   => Failure("Lookahead does not start inline element.", in)
            }
        }
    }

    /** Parses a single inline token. Either a span element or a chunk of text.
     */
    def oneInline(ctx:InlineContext):Parser[String] =
        markdownText(specialInlineChars, true) | elementParsers(ctx) | aChar

    /** Parser for inline markdown, always consumes all input, returns the resulting HTML.
     */
    def inline(m:LinkMap):Parser[String] = (oneInline(new InlineContext(m))*) ^^ {_.mkString}



    ///////////////////////////////////////////////////////////
    //   Inline Elements:                                    //
    //   br,code,xml tag,fast link,link,image,emphasis,strong, text chunk  //
    ///////////////////////////////////////////////////////////

    /** Parses two spaces at the end of a line to a manual break (
) */ val br:Parser[String] = (" \n") ^^^ {deco.decorateBreak() + "\n"} /** Parses an inline code element. * An inline code element is surrounded by single backticks ("`") * or double backticks ("``"). */ val code:Parser[String] = ((("``" ~> ((not("``")~> aChar)+) <~ "``")^^{_.mkString}) | ('`' ~> markdownText(Set('`'), false) <~ '`') ) ^^ { c => deco.decorateCode(c.mkString) } /** Parses any xml tag and escapes attribute values. */ val xmlTag:Parser[String] = if (deco.allowVerbatimXml) (xmlEndTag | xmlStartOrEmptyTag) else failure("Inline XML processing disabled.") /** A shortcut markdown link of the form */ def fastLink(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("a")){ failure("Cannot nest a link in a link.") } else { elem('<') ~> markdownText(Set('>',' ', '<', '\n'), true) <~ '>' ^^ { u => deco.decorateLink(u, u, None) } } /** A link started by square brackets, either a reference or a a link with the full URL. */ def link(ctx:InlineContext):Parser[String] = fullLink(ctx) | referenceLink(ctx) /** A markdown link with the full url given. */ def fullLink(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("a")){ failure("Cannot nest a link in a link.") } else { '[' ~> linkInline(ctx.addTag("a")) ~ ("](" ~ ows) ~ url ~ ows ~ title <~ (ows ~ ')') ^^ { case txt ~ _ ~ u ~ _ ~ ttl => deco.decorateLink(txt, u, ttl) } } /** A markdown link which references an url by id. */ def referenceLink(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("a")){ failure("Cannot nest a link in a link.") } else { ref(ctx.addTag("a")) ^^ { case (LinkDefinition(_, u, ttl), txt) => deco.decorateLink(txt, u, ttl) } } /** Inline markdown in a link. Like normal inline stuff but stops when it reaches a closing square bracket. */ def linkInline(ctx:InlineContext):Parser[String] = //( (not(']') ~> oneInline(ctx.addTag("a")))* ) ^^ {_.mkString} ((markdownText(specialLinkInlineChars, true) | elementParsers(ctx) | ((not(']') ~> aChar)))*) ^^ {_.mkString} /** We parse everything as a link/img url until we hit whitespace or a closing brace. */ val url:Parser[String] = markdownText(Set(')', ' ', '\t'), true) /** A title is everything in quotation marks. We allow even quotation marks in quotation marks. * We look ahead if we hit the closing brace after the quotation marks to detect if the title * ends or not. */ val title:Parser[Option[String]] = opt('"' ~> ((markdownText(Set('"'),true) ~ opt(not('"'~ows~')') ~> aChar))*) <~ '"') ^^ { case None => None case Some(chunks) => { val result = new StringBuilder() for (chunk <- chunks) { chunk match { case (text) ~ None => result.append(text) case (text) ~ Some(s) => result.append(text).append(s) } } Some(result.toString) } } /** Plaintext variant to refInline. Escapable text until a square bracket is hit. */ val refText:Parser[String] = markdownText(Set(']'), true) /** Parses an id reference. (Any text that is not a square bracket) * Succeeds only if the parsed id is found in the given lookup. * Returns the found link definition and the matched text. */ def idReference(ctx:InlineContext):Parser[(String, LinkDefinition)] = guard(acceptMatch(ctx.map)(refText ^^ (_.trim.toLowerCase))) ~ refText ^^ {case ld ~ t => (t, ld)} /** * A markdown reference of the form [text][id], [idText][] or [idText] * Parser returns a tuple with the link definition first and the text to display second. */ def ref(ctx:InlineContext):Parser[(LinkDefinition, String)] = ('[' ~> linkInline(ctx) ~ (']' ~ opt(' ') ~ '[') ~ idReference(ctx) <~ ']' ^^ { case t ~ dummy ~ pair => (pair._2, t)} ) | ('[' ~> idReference(ctx) <~ (']' ~ opt(opt(' ') ~ '[' ~ ows ~ ']')) ^^ { case (t, ld) => (ld, t)} ) /** * Parses either a referenced or a directly defined image. */ def img(ctx:InlineContext):Parser[String] = elem('!') ~> (directImg | refImg(ctx)) /** An image with an explicit path. */ val directImg:Parser[String] = elem('[') ~> refText ~ ("](" ~ ows) ~ url ~ ows ~ title <~ (ows ~ ')') ^^ { case altText ~ _ ~ path ~ _ ~ ttl => deco.decorateImg(altText, path, ttl) } /** * Parses a referenced image. */ def refImg(ctx:InlineContext):Parser[String] = ref(ctx) ^^ { case (LinkDefinition(_, u, ttl), alt) => deco.decorateImg(alt, u, ttl) } /** Parses inline in a span element like bold or emphasis or link up until the given end marker */ def spanInline(end:Parser[Any], ctx:InlineContext):Parser[String] = (markdownText(specialInlineChars, true) | elementParsers(ctx) | (not(end) ~> aChar)) ^^ {_.mkString} /** Parses a span element like __foo__ or *bar* */ def span(limiter:String, ctx:InlineContext):Parser[String] = (limiter~not(ws))~> (spanInline( (not(lookbehind(Set(' ', '\t', '\n'))) ~ limiter), ctx)+) <~ limiter ^^ { _.mkString } /** Either an emphasis or a strong text wrapped in asterisks. */ def spanAsterisk (ctx:InlineContext) = strongAsterisk(ctx) | emAsterisk(ctx) /** Either an emphasis or strong text wrapped in underscores. */ def spanUnderscore(ctx:InlineContext) = strongUnderscore(ctx) | emUnderscore(ctx) /**Parses emphasized text wrapped in asterisks: *foo* */ def emAsterisk(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("em")) { failure("Cannot nest emphasis.") } else { span("*", ctx.addTag("em")) ^^ { deco.decorateEmphasis(_) } } /**Parses emphasized text wrapped in underscores: _foo_ */ def emUnderscore(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("em")) { failure("Cannot nest emphasis.") } else { span("_", ctx.addTag("em")) ^^ { deco.decorateEmphasis(_) } } /**Parses strong text in asterisks: **foo** */ def strongAsterisk(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("strong")) { failure("Cannot nest strong text.") } else { span("**", ctx.addTag("strong")) ^^ { deco.decorateStrong(_) } } /**Parses strong text in underscores: __foo__ */ def strongUnderscore(ctx:InlineContext):Parser[String] = if (ctx.tags.contains("strong")) { failure("Cannot nest strong text.") } else { span("__", ctx.addTag("strong")) ^^ { deco.decorateStrong(_) } } /** * Runs the inline parser on the given input and returns the result */ def applyInline(s:String, m:LinkMap) = apply(inline(m), s) /** * Escapes the given string so it it can be embedded in xml. * Markdown escapes are not processed. */ def escapeXml(s:String) = { var i = 0 val end = s.length val result = new StringBuffer() //process chars until we hit a special char or the end while (i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy