
e.scalamd.scalamd.1.2.source-code.md.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scalamd Show documentation
Show all versions of scalamd Show documentation
ScalaMD: A Scala Markdown Processor
/** * Copyright (C) 2009-2010 the original author or authors. */ package org.fusesource.scalamd // import ru.circumflex.core._ import java.util.regex._ import java.util.Random import java.lang.StringBuilder import collection.mutable.ListBuffer // # The Markdown Processor /** * This utility converts a plain text written in [Markdown][1] into HTML fragment. * The typical usage is: * * val md = Markdown(myMarkdownText) * * [1]: http://daringfireball.net/projects/markdown/syntax "Markdown Syntax" */ object Markdown { // ## SmartyPants chars val leftQuote = "“" val rightQuote = "”" val dash = "—" val copy ="©" val reg = "®" val trademark = "™" val ellipsis = "…" val leftArrow = "←" val rightArrow = "→" // ## Commons val keySize = 20 val chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" val rnd = new Random val blockTags = "p" :: "div" :: "h1" :: "h2" :: "h3" :: "h4" :: "h5" :: "h6" :: "blockquote" :: "pre" :: "table" :: "dl" :: "ol" :: "ul" :: "script" :: "noscript" :: "form" :: "fieldset" :: "iframe" :: "math" :: "ins" :: "del" :: "article" :: "aside" :: "footer" :: "header" :: "hgroup" :: "nav" :: "section" :: "figure" :: "video" :: "audio" :: "embed" :: "canvas" :: "address" :: "details" :: "object" :: Nil val htmlNameTokenExpr = "[a-z_:][a-z0-9\\-_:.]*" // ## Regex patterns // We use precompile several regular expressions that are used for typical // transformations. // Outdent val rOutdent = Pattern.compile("^ {1,4}", Pattern.MULTILINE) // Standardize line endings val rLineEnds = Pattern.compile("\\r\\n|\\r") // Strip out whitespaces in blank lines val rBlankLines = Pattern.compile("^ +$", Pattern.MULTILINE) // Tabs val rTabs = Pattern.compile("\\t") // Trailing whitespace val rTrailingWS = Pattern.compile("\\s+$") // Start of inline HTML block val rInlineHtmlStart = Pattern.compile("^<(" + blockTags.mkString("|") + ")\\b[^/>]*?>", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE) // HTML comments val rHtmlComment = Pattern.compile("^ {0,3}()\\s*?(?=\\n+|\\Z)", Pattern.MULTILINE | Pattern.DOTALL) // Link definitions val rLinkDefinition = Pattern.compile("^ {0,3}\\[(.+)\\]:" + " *\\n? *(\\S+)>? *\\n? *" + "(?:[\"('](.+?)[\")'])?" + "(?=\\n+|\\Z)", Pattern.MULTILINE) // Character escaping val rEscAmp = Pattern.compile("&(?!#?[xX]?(?:[0-9a-fA-F]+|\\w+);)") val rEscLt = Pattern.compile("<(?![a-z/?\\$!])") val rInsideTags = Pattern.compile("<(/?" + htmlNameTokenExpr + "(?:\\s+(?:" + "(?:" + htmlNameTokenExpr + "\\s*=\\s*\"[^\"]*\")|" + "(?:" + htmlNameTokenExpr + "\\s*=\\s*'[^']*')|" + "(?:" + htmlNameTokenExpr + "\\s*=\\s*[a-z0-9_:.\\-]+)" + ")\\s*)*)>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE) // Headers val rH1 = Pattern.compile("^ {0,3}(\\S.*?)( *\\{#(.*?)\\})?\\n=+(?=\\n+|\\Z)", Pattern.MULTILINE) val rH2 = Pattern.compile("^ {0,3}(\\S.*?)( *\\{#(.*?)\\})?\\n-+(?=\\n+|\\Z)", Pattern.MULTILINE) val rHeaders = Pattern.compile("^(#{1,6}) *(\\S.*?)(?: *#*)?( *\\{#(.*?)\\})?$", Pattern.MULTILINE) // Horizontal rulers val rHr = Pattern.compile("^ {0,3}(?:" + "(?:(?:\\* *){3,})|" + "(?:(?:- *){3,})|" + "(?:(?:_ *){3,})" + ") *$", Pattern.MULTILINE) val rHtmlHr = Pattern.compile("^ {0,3}(
", Pattern.DOTALL) // Images val rImage = Pattern.compile("!\\[(.*?)\\]\\((.*?)( \"(.*?)\")?\\)") // Backslash escapes val backslashEscapes = ("\\\\\\\\" -> "\") :: ("\\\\`" -> "`") :: ("\\\\_" -> "_") :: ("\\\\>" -> ">") :: ("\\\\\\*" -> "*") :: ("\\\\\\{" -> "{") :: ("\\\\\\}" -> "}") :: ("\\\\\\[" -> "[") :: ("\\\\\\]" -> "]") :: ("\\\\\\(" -> "(") :: ("\\\\\\)" -> ")") :: ("\\\\#" -> "#") :: ("\\\\\\+" -> "+") :: ("\\\\-" -> "-") :: ("\\\\\\." -> ".") :: ("\\\\!" -> "!") :: Nil // Reference-style links val rRefLinks = Pattern.compile("(\\[(.*?)\\] ?(?:\\n *)?\\[(.*?)\\])") // Inline links val rInlineLinks = Pattern.compile("\\[(.*?)\\]\\( *(.*?)>? *" + "((['\"])(.*?)\\4)?\\)", Pattern.DOTALL) // Autolinks val rAutoLinks = Pattern.compile("<((https?|ftp):[^'\">\\s]+)>") // Autoemails val rAutoEmail = Pattern.compile("<([-.\\w]+\\@[-a-z0-9]+(\\.[-a-z0-9]+)*\\.[a-z]+)>") // Ems and strongs val rStrong = Pattern.compile("(\\*\\*|__)(?=\\S)(.+?[*_]*)(?<=\\S)\\1") val rEm = Pattern.compile("(\\*|_)(?=\\S)(.+?)(?<=\\S)\\1") // Manual linebreaks val rBrs = Pattern.compile(" {2,}\n") // Ampersand wrapping val rAmp = Pattern.compile("&(?!#?[xX]?(?:[0-9a-fA-F]+|\\w+);)") // SmartyPants val smartyPants = (Pattern.compile("(?<=\\s|\\A)(?:\"|")(?=\\S)") -> leftQuote) :: (Pattern.compile("(?<=[\\w)?!.])(?:\"|")(?=[.,;?!*)]|\\s|\\Z)") -> rightQuote) :: (Pattern.compile("--") -> dash) :: (Pattern.compile("\\(r\\)", Pattern.CASE_INSENSITIVE) -> reg) :: (Pattern.compile("\\(c\\)", Pattern.CASE_INSENSITIVE) -> copy) :: (Pattern.compile("\\(tm\\)", Pattern.CASE_INSENSITIVE) -> trademark) :: (Pattern.compile("\\.{3}") -> ellipsis) :: (Pattern.compile("<-|<-") -> leftArrow) :: (Pattern.compile("->|->") -> rightArrow) :: Nil // Markdown inside inline HTML val rInlineMd = Pattern.compile("(.*)", Pattern.DOTALL) // Macro definitions val rMacroDefs = Pattern.compile("") // TOC Macro val rToc = Pattern.compile("""\{\:toc\}""") /** * Convert the `source` from Markdown to HTML. */ def apply(source: String): String = new MarkdownText(source).toHtml var macros: List[MacroDefinition] = Nil } // # Processing Stuff case class MacroDefinition(pattern: String, flags: String, replacement: (Matcher)=>String, literally: Boolean) { val regex: Pattern = { var f = 0; if (flags != null) flags.toList.foreach { case 'i' => f = f | Pattern.CASE_INSENSITIVE case 'd' => f = f | Pattern.UNIX_LINES case 'm' => f = f | Pattern.MULTILINE case 's' => f = f | Pattern.DOTALL case 'u' => f = f | Pattern.UNICODE_CASE case 'x' => f = f | Pattern.COMMENTS case _ => } Pattern.compile(pattern, f) } override def toString = regex.toString } /** * We collect all processing logic within this class. */ class MarkdownText(source: CharSequence) { protected var listLevel = 0 protected var text = new StringEx(source) import Markdown._ /** * Link Definitions */ case class LinkDefinition(val url: String, val title: String) { override def toString = url + " (" + title + ")" } protected var links: Map[String, LinkDefinition] = Map() protected var macros: List[MacroDefinition] = Markdown.macros // Protector for HTML blocks val htmlProtector = new Protector // ## Encoding methods /** * All unsafe chars are encoded to SGML entities. */ protected def encodeUnsafeChars(code: StringEx): StringEx = code .replaceAll("<", "<") .replaceAll(">", ">") .replaceAll("*", "*") .replaceAll("`", "`") .replaceAll("_", "_") .replaceAll("\\", "\") /** * All characters escaped with backslash are encoded to corresponding * SGML entities. */ protected def encodeBackslashEscapes(text: StringEx): StringEx = backslashEscapes.foldLeft(text)((tx, p) => tx.replaceAll(Pattern.compile(p._1), p._2)) /** * All unsafe chars are encoded to SGML entities inside code blocks. */ protected def encodeCode(code: StringEx): StringEx = code .replaceAll(rEscAmp, "&") .replaceAll("<", "<") .replaceAll(">", ">") /** * Ampersands and less-than signes are encoded to `&` and `<` respectively. */ protected def encodeAmpsAndLts(text: StringEx) = text .replaceAll(rEscAmp, "&") .replaceAll(rEscLt, "<") /** * Encodes specially-treated characters inside the HTML tags. */ protected def encodeCharsInsideTags(text: StringEx) = text.replaceAll(rInsideTags, m => "<" + encodeUnsafeChars(new StringEx(m.group(1))) .replaceAll(rEscAmp, "&") .toString + ">") // ## Processing methods /** * Normalization includes following stuff: * * * replace DOS- and Mac-specific line endings with `\n`; * * replace tabs with spaces; * * reduce all blank lines (i.e. lines containing only spaces) to empty strings. */ protected def normalize(text: StringEx) = text .replaceAll(rLineEnds, "\n") .replaceAll(rTabs, " ") .replaceAll(rBlankLines, "") /** * All inline HTML blocks are hashified, so that no harm is done to their internals. */ protected def hashHtmlBlocks(text: StringEx): StringEx = { text.replaceAll(rHtmlHr, m => htmlProtector.addToken(m.group(1)) + "\n") val m = text.matcher(rInlineHtmlStart) if (m.find) { val tagName = m.group(1) // This regex will match either opening or closing tag; // opening tags will be captured by $1 leaving $2 empty, // while closing tags will be captured by $2 leaving $1 empty val mTags = text.matcher(Pattern.compile( "(<" + tagName + "\\b[^/>]*?>)|(" + tagName + "\\s*>)", Pattern.CASE_INSENSITIVE)) // Find end index of matching closing tag var depth = 1 var idx = m.end while (depth > 0 && idx < text.length && mTags.find(idx)) { if (mTags.group(2) == null) depth += 1 else depth -= 1 idx = mTags.end } // Having inline HTML subsequence val endIdx = idx val startIdx = m.start val inlineHtml = new StringEx(text.subSequence(startIdx, endIdx)) // Process markdown inside inlineHtml.replaceAll(rInlineMd, m => new MarkdownText(m.group(1)).toHtml) // Hashify block val key = htmlProtector.addToken(inlineHtml.toString) val sb = new StringBuilder(text.subSequence(0, startIdx)) .append("\n") .append(key) .append("\n") .append(text.subSequence(endIdx, text.length)) // Continue recursively until all blocks are processes hashHtmlBlocks(new StringEx(sb)) } else text } /** * All HTML comments are hashified too. */ protected def hashHtmlComments(text: StringEx): StringEx = text.replaceAll(rHtmlComment, m => { val comment = m.group(1) val hash = htmlProtector.addToken(comment) "\n" + hash + "\n" }) /** * Standalone link definitions are added to the dictionary and then * stripped from the document. */ protected def stripLinkDefinitions(text: StringEx) = text.replaceAll(rLinkDefinition, m => { val id = m.group(1).toLowerCase val url = m.group(2) val title = if (m.group(3) == null) "" else m.group(3) links += id -> LinkDefinition(url, title.replaceAll("\"", """)) "" }) /** * Macro definitions are stripped from the document. */ protected def stripMacroDefinitions(text: StringEx) = text.replaceAll(rMacroDefs, m => { val replacement = m.group(3) macros ++= List(MacroDefinition(m.group(1), m.group(2), (x)=> replacement, false)) "" }) /** * Block elements are processed within specified `text`. */ protected def runBlockGamut(text: StringEx): StringEx = { var result = text result = doHeaders(result) result = doHorizontalRulers(result) result = doLists(result) result = doCodeBlocks(result) result = doBlockQuotes(result) result = hashHtmlBlocks(result) // Again, now hashing our generated markup result = formParagraphs(result) return result } /** * Process both types of headers. */ protected def doHeaders(text: StringEx): StringEx = text .replaceAll(rH1, m => { val label = runSpanGamut(new StringEx(m.group(1))) val id = m.group(3) val idAttr = if (id == null) to_id(label.toString) else " id = \"" + id + "\"" ")\\s*?$", Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE) // Lists val listExpr = "( {0,3}([-+*]|\\d+\\.) +(?s:.+?)" + "(?:\\Z|\\n{2,}(?![-+*]|\\s|\\d+\\.)))" val rSubList = Pattern.compile("^" + listExpr, Pattern.MULTILINE) val rList = Pattern.compile("(?<=\\n\\n|\\A\\n?)" + listExpr, Pattern.MULTILINE) val rListItem = Pattern.compile("(\\n)?^( *)(?:[-+*]|\\d+\\.) +" + "((?s:.+?)\\n{1,2})(?=\\n*(?:\\Z|\\2(?:[-+*]|\\d+\\.) +))", Pattern.MULTILINE) // Code blocks val rCodeBlock = Pattern.compile("(?<=\\n\\n|\\A\\n?)" + "(^ {4}(?s:.+?))(?=\\Z|\\n+ {0,3}\\S)", Pattern.MULTILINE) val rCodeLangId = Pattern.compile("^\\s*lang:(.+?)(?:\\n|\\Z)") // Block quotes val rBlockQuote = Pattern.compile("((?:^ *>(?:.+(?:\\n|\\Z))+\\n*)+)", Pattern.MULTILINE) val rBlockQuoteTrims = Pattern.compile("(?:^ *> ?)|(?:^ *$)|(?-m:\\n+$)", Pattern.MULTILINE) // Paragraphs splitter val rParaSplit = Pattern.compile("\\n{2,}") // Code spans val rCodeSpan = Pattern.compile("(?(.*?) " + label + "
" }).replaceAll(rH2, m => { val label = runSpanGamut(new StringEx(m.group(1))) val id = m.group(3) val idAttr = if (id == null) to_id(label.toString) else " id = \"" + id + "\"" "" + label + "
" }).replaceAll(rHeaders, m => { val marker = m.group(1) val label = runSpanGamut(new StringEx(m.group(2))) val id = m.group(4) val idAttr = if (id == null) { if(marker.length<4) to_id(label.toString) else "" } else { " id = \"" + id + "\"" } "" + label + " " }) // TODO: handle the dup id case. def to_id(label:String) = " id = \""+label.replaceAll("""[^a-zA-Z0-9\-:]""", "_") + "\"" /** * Process horizontal rulers. */ protected def doHorizontalRulers(text: StringEx): StringEx = text.replaceAll(rHr, "\n
\n") /** * Process ordered and unordered lists and list items.. * * It is possible to have some nested block elements inside * lists, so the contents is passed to `runBlockGamut` after some * minor transformations. */ protected def doLists(text: StringEx): StringEx = { val pattern = if (listLevel == 0) rList else rSubList text.replaceAll(pattern, m => { val list = new StringEx(m.group(1)) .append("\n") .replaceAll(rParaSplit, "\n\n\n") .replaceAll(rTrailingWS, "\n") val listType = m.group(2) match { case s if s.matches("[*+-]") => "ul" case _ => "ol" } val result = processListItems(list).replaceAll(rTrailingWS, "") "<" + listType + ">\n" + result + "\n" + listType + ">\n\n" }) } protected def processListItems(text: StringEx): StringEx = { listLevel += 1 val sx = text.replaceAll(rListItem, m => { val content = m.group(3) val leadingLine = m.group(1) var item = new StringEx(content).outdent() if (leadingLine != null || content.indexOf("\n\n") != -1) item = runBlockGamut(item) else item = runSpanGamut(doLists(item)) "
" + code + "
\n\n"
})
/**
* Process blockquotes.
*
* It is possible to have some nested block elements inside
* blockquotes, so the contents is passed to `runBlockGamut` after some
* minor transformations.
*/
protected def doBlockQuotes(text: StringEx): StringEx =
text.replaceAll(rBlockQuote, m => {
val content = new StringEx(m.group(1))
.replaceAll(rBlockQuoteTrims, "")
"\n" + runBlockGamut(content) + "\n\n\n" }) /** * At this point all HTML blocks should be hashified, so we treat all lines * separated by more than 2 linebreaks as paragraphs. */ protected def formParagraphs(text: StringEx): StringEx = new StringEx( rParaSplit.split(text.toString.trim) .map(para => htmlProtector.decode(para) match { case Some(d) => d case _ => "
" + runSpanGamut(new StringEx(para)).toString + "
" }).mkString("\n\n")) /** * Span elements are processed within specified `text`. */ protected def runSpanGamut(text: StringEx): StringEx = { val protector = new Protector var result = protectCodeSpans(protector, text) result = doCodeSpans(protector, text) result = encodeBackslashEscapes(text) result = doImages(text) result = doRefLinks(text) result = doInlineLinks(text) result = doAutoLinks(text) result = doLineBreaks(text) result = protectHtmlTags(protector, text) result = doSmartyPants(text) result = doAmpSpans(text) result = doEmphasis(text) result = unprotect(protector, text) return result } protected def protectHtmlTags(protector: Protector, text: StringEx): StringEx = text.replaceAll(rInsideTags, m => protector.addToken(m.group(0))) protected def protectCodeSpans(protector: Protector, text: StringEx): StringEx = text.replaceAll(rCode, m => protector.addToken(m.group(0))) protected def unprotect(protector: Protector, text: StringEx): StringEx = protector.keys.foldLeft(text)((t, k) => t.replaceAll(k, protector.decode(k).getOrElse(""))) /** * Process code spans. */ protected def doCodeSpans(protector: Protector, text: StringEx): StringEx = text.replaceAll(rCodeSpan, m => protector.addToken("" + encodeCode(new StringEx(m.group(2).trim)) + "
"))
/**
* Process images.
*/
protected def doImages(text: StringEx): StringEx = text.replaceAll(rImage, m => {
val alt = m.group(1)
val src = m.group(2)
val title = m.group(4)
var result = "\n") /** * Process SmartyPants stuff. */ protected def doSmartyPants(text: StringEx): StringEx = smartyPants.foldLeft(text)((t,p) => t.replaceAll(p._1, p._2)) /** * Wrap ampersands with ``. */ protected def doAmpSpans(text: StringEx): StringEx = text.replaceAll(rAmp, "&") /** * Process user-defined macros. */ protected def doMacros(text: StringEx): StringEx = { macros.foldLeft(text)((t, m) => { t.replaceAllFunc(m.regex, m.replacement, m.literally) }) } /** * Process user-defined macros. */ protected def doToc(text: StringEx): StringEx = { text.replaceAllFunc(rToc, _=> (new TOC(text.toString)).toHtml, true); } /** * Transform the Markdown source into HTML. */ def toHtml(): String = { var result = text result = stripMacroDefinitions(result) result = doMacros(result) result = normalize(result) result = encodeCharsInsideTags(result) result = hashHtmlBlocks(result) result = hashHtmlComments(result) result = encodeAmpsAndLts(result) result = stripLinkDefinitions(result) result = runBlockGamut(result) result = doToc(result) return result.toString } } object TOC { val rHeadings = """
- """+"\n")
def endList(l: Int) = sb.append(" " * (l - 1) + "
- """+"\n" + sb.toString + "
© 2015 - 2025 Weber Informatics LLC | Privacy Policy