All Downloads are FREE. Search and download functionalities are using the official Maven repository.

.circumflex-markeven.2.3.source-code.inline.scala Maven / Gradle / Ivy

package ru.circumflex
package markeven

import core._
import java.io.{StringWriter, Writer}

class InlineProcessor(val out: Writer, val conf: MarkevenConf = EmptyMarkevenConf)
    extends Processor {

  def run(walk: Walker) {
    while (walk.hasCurrent)
      inline(walk)
  }

  def inline(walk: Walker) {
    // Escapes
    if (tryBackslashEscape(walk)) return
    // Typographics
    if (tryTypographics(walk)) return
    // Special chars
    if (tryAmp(walk)) return
    if (tryLt(walk)) return
    if (tryGt(walk)) return
    // Now bracing elements, from most special to least special
    if (tryTripleCodeSpan(walk)) return
    if (tryCodeSpan(walk)) return
    if (tryFormula(walk)) return
    if (tryEm(walk)) return
    if (tryStrong(walk)) return
    // Link, media and fragments
    if (tryHeadlessLink(walk)) return
    if (tryFragment(walk)) return
    if (tryMedia(walk)) return
    if (tryLink(walk)) return
    // Now generic characters
    out.write(conf.scrambler.getSpan)
    flushGeneric(walk)
  }

  /*! Generic characters are spit to the output "as is". */
  def flushGeneric(walk: Walker) {
    val char = walk.current
    if (!(conf.stripInvalidXmlChars && isInvalidXmlChar(char)))
      out.write(char)
    walk.skip()
  }

  def isInvalidXmlChar(char: Char): Boolean = {
    val code = char.toInt
    (code >= 0x1 && code <= 0x8) ||
        (code >= 0xB && code <= 0xC) ||
        (code >= 0xE && code <= 0x1F) ||
        (code >= 0x7F && code <= 0x84) ||
        (code >= 0x86 && code <= 0x9F) ||
        (code >= 0xFDD0 && code <= 0xFDDF) ||
        (code % 0x10000 == 0xFFFE) ||
        (code % 0x10000 == 0xFFFF)
  }

  /*! Certain chars, usually markers, can be backslash-escaped.
  We should respect them, too */
  def tryBackslashEscape(walk: Walker): Boolean = {
    if (walk.at("\\")) {
      // assume backslash escape
      walk.at(const.backslashEscape) match {
        case Some(m) =>
          out.write(m.group(1))
          walk.skip(m.group(0).length)
        case _ =>
          out.write("\\")
          walk.skip()
      }
      true
    } else false
  }

  /*! Ampersands should be escaped as SGML entities as long as they do not
  represent SGML entities themselves. */
  def tryAmp(walk: Walker): Boolean = {
    if (walk.at("&")) {
      // assume entity reference
      walk.at(const.entityRefefence) match {
        case Some(m) =>
          val s = m.group(0)
          walk.skip(s.length)
          out.write(s)
        case _ =>
          out.write("&")
          walk.skip()
      }
      true
    } else false
  }

  /*! The same logic applies to `<`, we escape it as SGML entity as long as
  it is not part of inline HTML tag or comment. Note that ampersands should be escaped
  even inside HTML tags. */
  def tryLt(walk: Walker): Boolean = {
    if (walk.at("<")) {
      // assume html tag
      walk.at(const.htmlTag) match {
        case Some(m) =>
          val s = m.group(0)
          walk.skip(s.length)
          val w = new SubSeqWalker(s)
          while (w.hasCurrent)
            flushHtmlTag(w)
        case _ =>
          // assume inline HTML comments
          walk.at(const.htmlComment) match {
            case Some(m) =>
              out.write(m.group(0))
              walk.startFrom(m.end)
            case _ =>
              out.write("<")
              walk.skip()
          }
      }
      true
    } else false
  }

  def flushHtmlTag(walk: Walker) {
    if (tryAmp(walk)) return
    flushGeneric(walk)
  }

  def flushPlain(walk: Walker) {
    if (tryAmp(walk)) return
    if (tryFragment(walk)) return
    flushGeneric(walk)
  }

  /*! This one does not recognize HTML tags, so is not called in the main `run`.
  It should be called whenever nested HTML markup should be escaped, e.g. in code spans.*/
  def tryEscapeLt(walk: Walker): Boolean = {
    if (walk.at("<")) {
      out.write("<")
      walk.skip()
      true
    } else false
  }

  /*! The `>` symbol is always escaped; therefore this method must be called after
  `tryLt` to preserve inline HTML integrity. */
  def tryGt(walk: Walker): Boolean = {
    if (walk.at(">")) {
      out.write(">")
      walk.skip()
      true
    } else false
  }

  /*! Scans forward to find specified marker, respecting backslashes. */
  def findMarker(walk: Walker, marker: String) = {
    walk.lookahead { it =>
      var found = false
      while (!found && it.hasCurrent) {
        if (it.at("\\" + marker))  // respect backslash escape
          it.skip(marker.length + 1)
        else if (it.at(marker))
          found = true
        else it.skip()
      }
      if (found) Some(it.position)
      else None
    }
  }

  /*! Looks for logical block enclosed into `marker` and executes `inside`,
  passing the block contents into it. Also repositions `walk` at the end
  of such block, if the end-marker exists; otherwise it flushes the marker
  and repositions `walk` at the end of specified `marker`. */
  def tryBracing(walk: Walker,
                 marker: String,
                 inside: Walker => Unit): Boolean = {
    if (walk.at(marker)) {
      walk.skip(marker.length)
      findMarker(walk, marker) match {
        case Some(idx) =>
          val w = new SubSeqWalker(walk, walk.position, idx)
          inside(w)
          walk.startFrom(idx + marker.length)
        case _ =>
          out.write(marker)
      }
      true
    } else false
  }

  /*! Triple code spans are for _hardcore freaks_, their contents is not processed
  at all. We only respect the \``` case to leave three consecutive backticks inside. */
  def tryTripleCodeSpan(walk: Walker): Boolean =
    tryBracing(walk, "```", { w =>
      out.write("")
      while (w.hasCurrent)
        flushPlain(w)
      out.write("")
    })

  /*! The contents of regular code spans is left "as is", preserving escaping
  SGML entity references, `<`, `>` and backslash-escapes. Fragments are also
  rendered inside code spans. */
  def tryCodeSpan(walk: Walker): Boolean =
    tryBracing(walk, "`", { w =>
      out.write("")
      while (w.hasCurrent)
        flushCode(w)
      out.write("")
    })

  def flushCode(walk: Walker) {
    if (tryBackslashEscape(walk: Walker)) return
    if (tryAmp(walk: Walker)) return
    if (tryEscapeLt(walk: Walker)) return
    if (tryGt(walk: Walker)) return
    if (tryFragment(walk: Walker)) return
    flushGeneric(walk)
  }

  /*! The contents between `%%` and `$$` is interpreted just like in regular code
  spans, except that backslash escapes inside are ignored, and the markers are
  flushed, too. This is to create MathJax-friendly formulas. */

  def tryFormula(walk: Walker): Boolean =
    tryBracing(walk, "%%", { w =>
      out.write("%%")
      while (w.hasCurrent)
        flushFormula(w)
      out.write("%%")
    }) || tryBracing(walk, "$$", { w =>
      out.write("$$")
      while (w.hasCurrent)
        flushFormula(w)
      out.write("$$")
    })

  def flushFormula(walk: Walker) {
    if (tryAmp(walk: Walker)) return
    if (tryEscapeLt(walk: Walker)) return
    if (tryGt(walk: Walker)) return
    if (tryFragment(walk: Walker)) return
    flushGeneric(walk)
  }


  /*! Em and strong are matched reluctantly up their closing character
  (`_` for em, `*` for strong). */

  def tryEm(walk: Walker): Boolean =
    tryBracing(walk, "_", { w =>
      out.write("")
      run(w)
      out.write("")
    })
  def tryStrong(walk: Walker): Boolean =
    tryBracing(walk, "*", { w =>
      out.write("")
      run(w)
      out.write("")
    })

  /*! Fragments go in a pair of double braces like `{{id}}`. They are resolved
  using Renderer's configuration and processed like regular inlines. */
  def tryFragment(walk: Walker): Boolean = {
    if (walk.at("{{")) {
      walk.at(const.fragment) match {
        case Some(m) =>
          val s = m.group(0)
          conf.resolveFragment(m.group(1)) match {
            case Some(f) =>
              flushFragment(f)
            case _ =>
              out.write(s)
          }
          walk.skip(s.length)
        case _ =>
          out.write("{{")
          walk.skip(2)
      }
      true
    } else false
  }

  def flushFragment(fragDef: FragmentDef) {
    fragDef.mode match {
      case ProcessingMode.PLAIN => // like triple code span
        val w = new SubSeqWalker(fragDef.body)
        while (w.hasCurrent)
          flushPlain(w)
      case ProcessingMode.CODE => // like regular code span
        val w = new SubSeqWalker(fragDef.body)
        while (w.hasCurrent)
          flushCode(w)
      case _ => // like regular inline
        run(new SubSeqWalker(fragDef.body))
    }
  }

  /*! Two styles of links and media are supported: inline like `[text](url)` and
  referencial like `[text][id]`. The latter ones are resolved using Renderer's
  configuration. */
  def tryLink(walk: Walker): Boolean = {
    if (walk.at("[")) {
      resolveLinkDef(walk, false) match {
        case Some((text, linkDef)) =>
          val w = new StringWriter
          new InlineProcessor(w, conf).process(text)
          linkDef.writeLink(out, w.toString)
        case _ =>
          out.write("[")
          walk.skip()
      }
      true
    } else false
  }

  def tryMedia(walk: Walker): Boolean = {
    if (walk.at("![")) {
      walk.skip()
      resolveLinkDef(walk, true) match {
        case Some((alt, linkDef)) =>
          linkDef.writeMedia(out, escapeHtml(alt))
        case _ =>
          out.write("![")
          walk.skip()
      }
      true
    } else false
  }

  def tryHeadlessLink(walk: Walker): Boolean = {
    if (walk.at("[[")) {
      walk.skip(2)
      val startIdx = walk.position
      var found = false
      while (!found && walk.hasCurrent) {
        if (walk.at("]]")) found = true
        else walk.skip()
      }
      if (found) {
        val id = walk.subSequence(startIdx, walk.position).toString
        conf.resolveLink(id) match {
          case Some(linkDef) =>
            linkDef.writeLink(out, linkDef.title)
            walk.skip(2)  // skip closing ]]
            return true
          case _ =>
        }
      }
      out.write("[[")
      walk.startFrom(startIdx)
      true
    } else false
  }

  // Resolves link definition and moves walker to the end of that definition
  def resolveLinkDef(walk: Walker, media: Boolean): Option[(String, LinkDef)] = {
    assert(walk.at("["))
    walk.skip()
    val startIdx = walk.position
    while (walk.hasCurrent && !walk.at("]"))
      if (walk.at("\\]")) walk.skip(2)
      else walk.skip()
    if (!walk.hasCurrent) {
      walk.startFrom(startIdx - 1)
      return None
    }
    assert(walk.at("]"))
    val text = walk.subSequence(startIdx, walk.position).toString
    walk.skip()
    walk.at(const.inlineLink) flatMap { m =>
      val s = m.group(0)
      walk.skip(s.length)
      val url = m.group(1)
      Some(text -> new LinkDef(url))
    } orElse {
      walk.at(const.refLink) flatMap { m =>
        val s = m.group(0)
        val id = m.group(1)
        val ld = if (media) conf.resolveMedia(id) else conf.resolveLink(id)
        ld match {
          case Some(linkDef) =>
            walk.skip(s.length)
            Some(text -> linkDef)
          case _ => None
        }
      }
    } orElse {
      // reset the original walker to point to initial "[" in case of any failure
      walk.startFrom(startIdx - 1)
      None
    }
  }

  def tryTypographics(walk: Walker): Boolean = {
    if (walk.at("--")) {
      walk.skip(2)
      out.write("—")
      return true
    }
    if (walk.at("(r)") || walk.at("(R)")) {
      walk.skip(3)
      out.write("®")
      return true
    }
    if (walk.at("(c)") || walk.at("(C)")) {
      walk.skip(3)
      out.write("©")
      return true
    }
    if (walk.at("(tm)") || walk.at("(TM)")) {
      walk.skip(4)
      out.write("™")
      return true
    }
    if (walk.at("...")) {
      walk.skip(3)
      out.write("…")
      return true
    }
    if (walk.at("<-")) {
      walk.skip(2)
      out.write("←")
      return true
    }
    if (walk.at("<-")) {
      walk.skip(5)
      out.write("←")
      return true
    }
    if (walk.at("->")) {
      walk.skip(2)
      out.write("→")
      return true
    }
    if (walk.at("->")) {
      walk.skip(5)
      out.write("→")
      return true
    }
    walk.at(const.ty_leftQuote) map { m =>
      walk.startFrom(m.end)
      out.write(conf.leftQuote)
      return true
    }
    walk.at(const.ty_rightQuote) map { m =>
      walk.startFrom(m.end)
      out.write(conf.rightQuote)
      return true
    }
    false
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy