org.markdownj.MarkdownProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of markdownj-core Show documentation
Show all versions of markdownj-core Show documentation
Core functionality provided by MarkdownJ.
The newest version!
/*
Copyright (c) 2005, Martian Software
Authors: Pete Bevin, John Mutchek
http://www.martiansoftware.com/markdownj
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name "Markdown" nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
This software is provided by the copyright holders and contributors "as
is" and any express or implied warranties, including, but not limited
to, the implied warranties of merchantability and fitness for a
particular purpose are disclaimed. In no event shall the copyright owner
or contributors be liable for any direct, indirect, incidental, special,
exemplary, or consequential damages (including, but not limited to,
procurement of substitute goods or services; loss of use, data, or
profits; or business interruption) however caused and on any theory of
liability, whether in contract, strict liability, or tort (including
negligence or otherwise) arising in any way out of the use of this
software, even if advised of the possibility of such damage.
*/
package org.markdownj;
import java.util.Collection;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Convert Markdown text into HTML, as per http://daringfireball.net/projects/markdown/ .
* Usage:
*
* MarkdownProcessor markdown = new MarkdownProcessor();
* String html = markdown.markdown("*italic* **bold**\n_italic_ __bold__");
*
*/
public class MarkdownProcessor {
private Random rnd = new Random();
private Map linkDefinitions = new TreeMap();
private static final CharacterProtector HTML_PROTECTOR = new CharacterProtector();
private static final CharacterProtector CHAR_PROTECTOR = new CharacterProtector();
private int listLevel;
private int tabWidth = 4;
/**
* Creates a new Markdown processor.
*/
public MarkdownProcessor() {
listLevel = 0;
}
/**
* Perform the conversion from Markdown to HTML.
*
* @param txt - input in markdown format
* @return HTML block corresponding to txt passed in.
*/
public String markdown(String txt) {
if (txt == null) {
txt = "";
}
TextEditor text = new TextEditor(txt);
// Standardize line endings:
text.replaceAll("\\r\\n", "\n"); // DOS to Unix
text.replaceAll("\\r", "\n"); // Mac to Unix
text.replaceAll("^[ \\t]+$", "");
// Make sure $text ends with a couple of newlines:
text.append("\n\n");
text.detabify();
text.deleteAll("^[ ]+$");
hashHTMLBlocks(text);
stripLinkDefinitions(text);
text = runBlockGamut(text);
unEscapeSpecialChars(text);
text.append("\n");
return text.toString();
}
private TextEditor encodeBackslashEscapes(TextEditor text) {
char[] normalChars = "`_>!".toCharArray();
char[] escapedChars = "*{}[]()#+-.".toCharArray();
// Two backslashes in a row
text.replaceAllLiteral("\\\\\\\\", CHAR_PROTECTOR.encode("\\"));
// Normal characters don't require a backslash in the regular expression
encodeEscapes(text, normalChars, "\\\\");
encodeEscapes(text, escapedChars, "\\\\\\");
return text;
}
private TextEditor encodeEscapes(TextEditor text, char[] chars, String slashes) {
for (char ch : chars) {
String regex = slashes + ch;
text.replaceAllLiteral(regex, CHAR_PROTECTOR.encode(String.valueOf(ch)));
}
return text;
}
private void stripLinkDefinitions(TextEditor text) {
Pattern p = Pattern.compile("^[ ]{0,3}\\[(.+)\\]:" + // ID = $1
"[ \\t]*\\n?[ \\t]*" + // Space
"(\\S+?)>?" + // URL = $2
"[ \\t]*\\n?[ \\t]*" + // Space
"(?:[\"(](.+?)[\")][ \\t]*)?" + // Optional title = $3
"(?:\\n+|\\Z)",
Pattern.MULTILINE);
text.replaceAll(p, new Replacement() {
public String replacement(Matcher m) {
String id = m.group(1).toLowerCase();
String url = encodeAmpsAndAngles(new TextEditor(m.group(2))).toString();
String title = m.group(3);
if (title == null) {
title = "";
}
title = replaceAll(title, "\"", """);
linkDefinitions.put(id, new LinkDefinition(url, title));
return "";
}
});
}
public TextEditor runBlockGamut(TextEditor text) {
doHeaders(text);
doHorizontalRules(text);
doLists(text);
doCodeBlocks(text);
doBlockQuotes(text);
hashHTMLBlocks(text);
return formParagraphs(text);
}
private void doHorizontalRules(TextEditor text) {
String[] hrDelimiters = {"\\*", "-", "_"};
for (String hrDelimiter : hrDelimiters) {
text.replaceAll("^[ ]{0,2}([ ]?" + hrDelimiter + "[ ]?){3,}[ ]*$", "
");
}
}
private void hashHTMLBlocks(TextEditor text) {
// Hashify HTML blocks:
// We only want to do this for block-level HTML tags, such as headers,
// lists, and tables. That's because we still want to wrap s around
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
// phrase emphasis, and spans. The list of tags we're looking for is
// hard-coded:
String[] tagsA = {
"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "pre", "table",
"dl", "ol", "ul", "script", "noscript", "form", "fieldset", "iframe", "math"
};
String[] tagsB = {"ins", "del"};
String alternationA = join("|", tagsA);
String alternationB = alternationA + "|" + join("|", tagsB);
int less_than_tab = tabWidth - 1;
// First, look for nested blocks, e.g.:
//
//
// tags for inner block must be indented.
//
//
//
// The outermost tags must start at the left margin for this to match, and
// the inner nested divs must be indented.
// We need to do this before the next, more liberal match, because the next
// match will start at the first `` and stop at the first ``.
Pattern p1 = Pattern.compile("(" +
"^<(" + alternationA + ")" +
"\\b" +
"(.*\\n)*?" +
"\\2>" +
"[ ]*" +
"(?=\\n+|\\Z))", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
Replacement protectHTML = new Replacement() {
public String replacement(Matcher m) {
String literal = m.group();
return "\n\n" + HTML_PROTECTOR.encode(literal) + "\n\n";
}
};
text.replaceAll(p1, protectHTML);
// Now match more liberally, simply from `\n` to ` \n`
Pattern p2 = Pattern.compile("(" +
"^" +
"<(" + alternationB + ")" +
"\\b" +
"(.*\\n)*?" +
".*\\2>" +
"[ ]*" +
"(?=\\n+|\\Z))", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
text.replaceAll(p2, protectHTML);
// Special case for
Pattern p3 = Pattern.compile("(?:" +
"(?<=\\n\\n)" +
"|" +
"\\A\\n?" +
")" +
"(" +
"[ ]{0," + less_than_tab + "}" +
"<(hr)" +
"\\b" +
"([^<>])*?" +
"/?>" +
"[ ]*" +
"(?=\\n{2,}|\\Z))", Pattern.CASE_INSENSITIVE);
text.replaceAll(p3, protectHTML);
// Special case for standalone HTML comments:
Pattern p4 = Pattern.compile("(?:" +
"(?<=\\n\\n)" +
"|" +
"\\A\\n?" +
")" +
"(" +
"[ ]{0," + less_than_tab + "}" +
"(?s:" +
"" +
")" +
"[ ]*" +
"(?=\\n{2,}|\\Z)" +
")");
text.replaceAll(p4, protectHTML);
}
private TextEditor formParagraphs(TextEditor markup) {
markup.deleteAll("\\A\\n+");
markup.deleteAll("\\n+\\z");
String[] paragraphs;
if (markup.isEmpty()) {
paragraphs = new String[0];
} else {
paragraphs = Pattern.compile("\\n{2,}").split(markup.toString());
}
for (int i = 0; i < paragraphs.length; i++) {
String paragraph = paragraphs[i];
String decoded = HTML_PROTECTOR.decode(paragraph);
if (decoded != null) {
paragraphs[i] = decoded;
} else {
paragraph = runSpanGamut(new TextEditor(paragraph)).toString();
paragraphs[i] = "" + paragraph + "
";
}
}
return new TextEditor(join("\n\n", paragraphs));
}
private TextEditor doAutoLinks(TextEditor markup) {
markup.replaceAll("<((https?|ftp):[^'\">\\s]+)>", "$1");
Pattern email = Pattern.compile("<([-.\\w]+\\@[-a-z0-9]+(\\.[-a-z0-9]+)*\\.[a-z]+)>");
markup.replaceAll(email, new Replacement() {
public String replacement(Matcher m) {
String address = m.group(1);
TextEditor ed = new TextEditor(address);
unEscapeSpecialChars(ed);
String addr = encodeEmail(ed.toString());
String url = encodeEmail("mailto:" + ed.toString());
return "" + addr + "";
}
});
return markup;
}
private void unEscapeSpecialChars(TextEditor ed) {
for (String hash : CHAR_PROTECTOR.getAllEncodedTokens()) {
String plaintext = CHAR_PROTECTOR.decode(hash);
ed.replaceAllLiteral(hash, plaintext);
}
}
private String encodeEmail(String s) {
StringBuilder sb = new StringBuilder();
char[] email = s.toCharArray();
for (char ch : email) {
double r = rnd.nextDouble();
if (r < 0.45) { // Decimal
sb.append("");
sb.append((int) ch);
sb.append(';');
} else if (r < 0.9) { // Hex
sb.append("");
sb.append(Integer.toString((int) ch, 16));
sb.append(';');
} else {
sb.append(ch);
}
}
return sb.toString();
}
private TextEditor doBlockQuotes(TextEditor markup) {
Pattern p = Pattern.compile("(" +
"(" +
"^[ \t]*>[ \t]?" + // > at the start of a line
".+\\n" + // rest of the first line
"(.+\\n)*" + // subsequent consecutive lines
"\\n*" + // blanks
")+" +
")", Pattern.MULTILINE);
return markup.replaceAll(p, new Replacement() {
public String replacement(Matcher m) {
TextEditor blockQuote = new TextEditor(m.group(1));
blockQuote.deleteAll("^[ \t]*>[ \t]?");
blockQuote.deleteAll("^[ \t]+$");
blockQuote = runBlockGamut(blockQuote);
blockQuote.replaceAll("^", " ");
Pattern p1 = Pattern.compile("(\\s*.*?
)", Pattern.DOTALL);
blockQuote = blockQuote.replaceAll(p1, new Replacement() {
public String replacement(Matcher m1) {
String pre = m1.group(1);
return deleteAll(pre, "^ ");
}
});
return "\n" + blockQuote + "\n
\n\n";
}
});
}
private TextEditor doCodeBlocks(TextEditor markup) {
Pattern p = Pattern.compile("" +
"(?:\\n\\n|\\A)" +
"((?:" +
"(?:[ ]{4})" +
".*\\n+" +
")+" +
")" +
"((?=^[ ]{0,4}\\S)|\\Z)", Pattern.MULTILINE);
return markup.replaceAll(p, new Replacement() {
private static final String LANG_IDENTIFIER = "lang:";
public String replacement(Matcher m) {
String codeBlock = m.group(1);
TextEditor ed = new TextEditor(codeBlock);
ed.outdent();
encodeCode(ed);
ed.detabify().deleteAll("\\A\\n+").deleteAll("\\s+\\z");
String text = ed.toString();
String out;
String firstLine = firstLine(text);
if (isLanguageIdentifier(firstLine)) {
out = languageBlock(firstLine, text);
} else {
out = genericCodeBlock(text);
}
return out;
}
public String firstLine(String text)
{
if (text == null) {
return "";
}
String[] splitted = text.split("\\n");
return splitted[0];
}
public boolean isLanguageIdentifier(String line)
{
if (line == null) {
return false;
}
String lang = "";
if (line.startsWith(LANG_IDENTIFIER)) {
lang = line.replaceFirst(LANG_IDENTIFIER, "").trim();
}
return lang.length() > 0;
}
public String languageBlock(String firstLine, String text)
{
// dont'use %n in format string (markdown aspect every new line char as "\n")
//String codeBlockTemplate = "%n%s%n
"; // http://alexgorbatchev.com/wiki/SyntaxHighlighter
String codeBlockTemplate = "\n\n\n%s\n
\n\n"; // http://shjs.sourceforge.net/doc/documentation.html
String lang = firstLine.replaceFirst(LANG_IDENTIFIER, "").trim();
String block = text.replaceFirst( firstLine+"\n", "");
return String.format(codeBlockTemplate, lang, block);
}
public String genericCodeBlock(String text)
{
// dont'use %n in format string (markdown aspect every new line char as "\n")
String codeBlockTemplate = "\n\n%s\n
\n\n";
return String.format(codeBlockTemplate, text);
}
});
}
private void encodeCode(TextEditor ed) {
ed.replaceAll("&", "&");
ed.replaceAll("<", "<");
ed.replaceAll(">", ">");
ed.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
ed.replaceAll("_", CHAR_PROTECTOR.encode("_"));
ed.replaceAll("\\{", CHAR_PROTECTOR.encode("{"));
ed.replaceAll("\\}", CHAR_PROTECTOR.encode("}"));
ed.replaceAll("\\[", CHAR_PROTECTOR.encode("["));
ed.replaceAll("\\]", CHAR_PROTECTOR.encode("]"));
ed.replaceAll("\\\\", CHAR_PROTECTOR.encode("\\"));
}
private TextEditor doLists(TextEditor text) {
int lessThanTab = tabWidth - 1;
String wholeList =
"(" +
"(" +
"[ ]{0," + lessThanTab + "}" +
"((?:[-+*]|\\d+[.]))" + // $3 is first list item marker
"[ ]+" +
")" +
"(?s:.+?)" +
"(" +
"\\z" + // End of input is OK
"|" +
"\\n{2,}" +
"(?=\\S)" + // If not end of input, then a new para
"(?![ ]*" +
"(?:[-+*]|\\d+[.])" +
"[ ]+" +
")" + // negative lookahead for another list marker
")" +
")";
if (listLevel > 0) {
Replacement replacer = new Replacement() {
public String replacement(Matcher m) {
String list = m.group(1);
String listStart = m.group(3);
String listType;
if (listStart.matches("[*+-]")) {
listType = "ul";
} else {
listType = "ol";
}
// Turn double returns into triple returns, so that we can make a
// paragraph for the last item in a list, if necessary:
list = replaceAll(list, "\\n{2,}", "\n\n\n");
String result = processListItems(list);
// Trim any trailing whitespace, to put the closing `` or ``
// up on the preceding line, to get it past the current stupid
// HTML block parser. This is a hack to work around the terrible
// hack that is the HTML block parser.
result = result.replaceAll("\\s+$", "");
String html;
if ("ul".equals(listType)) {
html = "" + result + "
\n";
} else {
html = "" + result + "
\n";
}
return html;
}
};
Pattern matchStartOfLine = Pattern.compile("^" + wholeList, Pattern.MULTILINE);
text.replaceAll(matchStartOfLine, replacer);
} else {
Replacement replacer = new Replacement() {
public String replacement(Matcher m) {
String list = m.group(1);
String listStart = m.group(3);
String listType = "";
if (listStart.matches("[*+-]")) {
listType = "ul";
} else {
listType = "ol";
}
// Turn double returns into triple returns, so that we can make a
// paragraph for the last item in a list, if necessary:
list = replaceAll(list, "\n{2,}", "\n\n\n");
String result = processListItems(list);
String html;
if (listStart.matches("[*+-]")) {
html = "\n" + result + "
\n";
} else {
html = "\n" + result + "
\n";
}
return html;
}
};
Pattern matchStartOfLine = Pattern.compile("(?:(?<=\\n\\n)|\\A\\n?)" + wholeList, Pattern.MULTILINE);
text.replaceAll(matchStartOfLine, replacer);
}
return text;
}
private String processListItems(String list) {
// The listLevel variable keeps track of when we're inside a list.
// Each time we enter a list, we increment it; when we leave a list,
// we decrement. If it's zero, we're not in a list anymore.
//
// We do this because when we're not inside a list, we want to treat
// something like this:
//
// I recommend upgrading to version
// 8. Oops, now this line is treated
// as a sub-list.
//
// As a single paragraph, despite the fact that the second line starts
// with a digit-period-space sequence.
//
// Whereas when we're inside a list (or sub-list), that line will be
// treated as the start of a sub-list. What a kludge, huh? This is
// an aspect of Markdown's syntax that's hard to parse perfectly
// without resorting to mind-reading. Perhaps the solution is to
// change the syntax rules such that sub-lists must start with a
// starting cardinal number; e.g. "1." or "a.".
listLevel++;
// Trim trailing blank lines:
list = replaceAll(list, "\\n{2,}\\z", "\n");
Pattern p = Pattern.compile("(\\n)?" +
"^([ \\t]*)([-+*]|\\d+[.])[ ]+" +
"((?s:.+?)(\\n{1,2}))" +
"(?=\\n*(\\z|\\2([-+\\*]|\\d+[.])[ \\t]+))",
Pattern.MULTILINE);
list = replaceAll(list, p, new Replacement() {
public String replacement(Matcher m) {
String text = m.group(4);
TextEditor item = new TextEditor(text);
String leadingLine = m.group(1);
if (!isEmptyString(leadingLine) || hasParagraphBreak(item)) {
item = runBlockGamut(item.outdent());
} else {
// Recurse sub-lists
item = doLists(item.outdent());
item = runSpanGamut(item);
}
return "" + item.trim().toString() + " \n";
}
});
listLevel--;
return list;
}
private boolean hasParagraphBreak(TextEditor item) {
return item.toString().indexOf("\n\n") != -1;
}
private boolean isEmptyString(String leadingLine) {
return leadingLine == null || leadingLine.equals("");
}
private TextEditor doHeaders(TextEditor markup) {
// setext-style headers
markup.replaceAll("^(.*)\n====+$", "$1
");
markup.replaceAll("^(.*)\n----+$", "$1
");
// atx-style headers - e.g., "#### heading 4 ####"
Pattern p = Pattern.compile("^(#{1,6})\\s*(.*?)\\s*\\1?$", Pattern.MULTILINE);
markup.replaceAll(p, new Replacement() {
public String replacement(Matcher m) {
String marker = m.group(1);
String heading = m.group(2);
int level = marker.length();
String tag = "h" + level;
return "<" + tag + ">" + heading + "" + tag + ">\n";
}
});
return markup;
}
private String join(String separator, String[] strings) {
int length = strings.length;
StringBuilder buf = new StringBuilder();
if (length > 0) {
buf.append(strings[0]);
for (int i = 1; i < length; i++) {
buf.append(separator).append(strings[i]);
}
}
return buf.toString();
}
public TextEditor runSpanGamut(TextEditor text) {
text = escapeSpecialCharsWithinTagAttributes(text);
text = doCodeSpans(text);
text = encodeBackslashEscapes(text);
doImages(text);
doAnchors(text);
doAutoLinks(text);
// Fix for BUG #1357582
// We must call escapeSpecialCharsWithinTagAttributes() a second time to
// escape the contents of any attributes generated by the prior methods.
// - Nathan Winant, [email protected], 8/29/2006
text = escapeSpecialCharsWithinTagAttributes(text);
encodeAmpsAndAngles(text);
doItalicsAndBold(text);
// Manual line breaks
text.replaceAll(" {2,}\n", "
\n");
return text;
}
/**
* escape special characters
*
* Within tags -- meaning between < and > -- encode [\ ` * _] so they
* don't conflict with their use in Markdown for code, italics and strong.
* We're replacing each such character with its corresponding random string
* value; this is likely overkill, but it should prevent us from colliding
* with the escape values by accident.
*
* @param text
* @return
*/
private TextEditor escapeSpecialCharsWithinTagAttributes(TextEditor text) {
Collection tokens = text.tokenizeHTML();
TextEditor newText = new TextEditor("");
for (HTMLToken token : tokens) {
String value = token.getText();
if (token.isTag()) {
value = value.replaceAll("\\\\", CHAR_PROTECTOR.encode("\\"));
value = value.replaceAll("`", CHAR_PROTECTOR.encode("`"));
value = value.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
value = value.replaceAll("_", CHAR_PROTECTOR.encode("_"));
}
newText.append(value);
}
return newText;
}
private void doImages(TextEditor text) {
// Inline image syntax
text.replaceAll("!\\[(.*)\\]\\((.*) \"(.*)\"\\)", "");
text.replaceAll("!\\[(.*)\\]\\((.*)\\)", "");
// Reference-style image syntax
Pattern imageLink = Pattern.compile("(" +
"[!]\\[(.*?)\\]" + // alt text = $2
"[ ]?(?:\\n[ ]*)?" +
"\\[(.*?)\\]" + // ID = $3
")");
text.replaceAll(imageLink, new Replacement() {
public String replacement(Matcher m) {
String replacementText;
String wholeMatch = m.group(1);
String altText = m.group(2);
String id = m.group(3).toLowerCase();
if (id == null || "".equals(id)) {
id = altText.toLowerCase();
}
// imageDefinition is the same as linkDefinition
LinkDefinition defn = linkDefinitions.get(id);
if (defn != null) {
String url = defn.getUrl();
url = url.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
url = url.replaceAll("_", CHAR_PROTECTOR.encode("_"));
String title = defn.getTitle();
String titleTag = "";
if (title != null && !title.equals("")) {
title = title.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
title = title.replaceAll("_", CHAR_PROTECTOR.encode("_"));
titleTag = " alt=\"" + altText + "\" title=\"" + title + "\"";
}
replacementText = "";
} else {
replacementText = wholeMatch;
}
return replacementText;
}
});
}
private TextEditor doAnchors(TextEditor markup) {
// Internal references: [link text] [id]
Pattern internalLink = Pattern.compile("(" +
"\\[(.*?)\\]" + // Link text = $2
"[ ]?(?:\\n[ ]*)?" +
"\\[(.*?)\\]" + // ID = $3
")");
markup.replaceAll(internalLink, new Replacement() {
public String replacement(Matcher m) {
String replacementText;
String wholeMatch = m.group(1);
String linkText = m.group(2);
String id = m.group(3).toLowerCase();
if (id == null || "".equals(id)) { // for shortcut links like [this][]
id = linkText.toLowerCase();
}
LinkDefinition defn = linkDefinitions.get(id);
if (defn != null) {
String url = defn.getUrl();
// protect emphasis (* and _) within urls
url = url.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
url = url.replaceAll("_", CHAR_PROTECTOR.encode("_"));
String title = defn.getTitle();
String titleTag = "";
if (title != null && !title.equals("")) {
// protect emphasis (* and _) within urls
title = title.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
title = title.replaceAll("_", CHAR_PROTECTOR.encode("_"));
titleTag = " title=\"" + title + "\"";
}
replacementText = "" + linkText + "";
} else {
replacementText = wholeMatch;
}
return replacementText;
}
});
// Inline-style links: [link text](url "optional title")
Pattern inlineLink = Pattern.compile("(" + // Whole match = $1
"\\[(.*?)\\]" + // Link text = $2
"\\(" +
"[ \\t]*" +
"(.*?)>?" + // href = $3
"[ \\t]*" +
"(" +
"(['\"])" + // Quote character = $5
"(.*?)" + // Title = $6
"\\5" +
")?" +
"\\)" +
")", Pattern.DOTALL);
markup.replaceAll(inlineLink, new Replacement() {
public String replacement(Matcher m) {
String linkText = m.group(2);
String url = m.group(3);
String title = m.group(6);
// protect emphasis (* and _) within urls
url = url.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
url = url.replaceAll("_", CHAR_PROTECTOR.encode("_"));
StringBuilder result = new StringBuilder();
result.append("").append(linkText);
result.append("");
return result.toString();
}
});
// Last, handle reference-style shortcuts: [link text]
// These must come last in case you've also got [link test][1]
// or [link test](/foo)
Pattern referenceShortcut = Pattern.compile("(" + // wrap whole match in $1
"\\[" +
"([^\\[\\]]+)" + // link text = $2; can't contain '[' or ']'
"\\]" +
")", Pattern.DOTALL);
markup.replaceAll(referenceShortcut, new Replacement() {
public String replacement(Matcher m) {
String replacementText;
String wholeMatch = m.group(1);
String linkText = m.group(2);
String id = m.group(2).toLowerCase(); // link id should be lowercase
id = id.replaceAll("[ ]?\\n", " "); // change embedded newlines into spaces
LinkDefinition defn = linkDefinitions.get(id.toLowerCase());
if (defn != null) {
String url = defn.getUrl();
// protect emphasis (* and _) within urls
url = url.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
url = url.replaceAll("_", CHAR_PROTECTOR.encode("_"));
String title = defn.getTitle();
String titleTag = "";
if (title != null && !title.equals("")) {
// protect emphasis (* and _) within urls
title = title.replaceAll("\\*", CHAR_PROTECTOR.encode("*"));
title = title.replaceAll("_", CHAR_PROTECTOR.encode("_"));
titleTag = " title=\"" + title + "\"";
}
replacementText = "" + linkText + "";
} else {
replacementText = wholeMatch;
}
return replacementText;
}
});
return markup;
}
private TextEditor doItalicsAndBold(TextEditor markup) {
markup.replaceAll("(\\*\\*|__)(?=\\S)(.+?[*_]*)(?<=\\S)\\1", "$2");
markup.replaceAll("(\\*|_)(?=\\S)(.+?)(?<=\\S)\\1", "$2");
return markup;
}
private TextEditor encodeAmpsAndAngles(TextEditor markup) {
// Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
// http://bumppo.net/projects/amputator/
markup.replaceAll("&(?!#?[xX]?(?:[0-9a-fA-F]+|\\w+);)", "&");
markup.replaceAll("<(?![a-zA-Z/?\\$!])", "<");
return markup;
}
private TextEditor doCodeSpans(TextEditor markup) {
return markup.replaceAll(Pattern.compile("(?" + subEditor.toString() + "
";
}
});
}
private String deleteAll(String text, String regex) {
return replaceAll(text, regex, "");
}
private String replaceAll(String text, String regex, String replacement) {
TextEditor ed = new TextEditor(text);
ed.replaceAll(regex, replacement);
return ed.toString();
}
private String replaceAll(String markup, Pattern pattern, Replacement replacement) {
TextEditor ed = new TextEditor(markup);
ed.replaceAll(pattern, replacement);
return ed.toString();
}
@Override
public String toString() {
return "Markdown Processor for Java 0.4.0 (compatible with Markdown 1.0.2b2)";
}
public static void main(String[] args) {
StringBuilder buf = new StringBuilder();
char[] cbuf = new char[1024];
java.io.Reader in = new java.io.InputStreamReader(System.in);
try {
int charsRead = in.read(cbuf);
while (charsRead >= 0) {
buf.append(cbuf, 0, charsRead);
charsRead = in.read(cbuf);
}
System.out.println(new MarkdownProcessor().markdown(buf.toString()));
} catch (java.io.IOException e) {
System.err.println("Error reading input: " + e.getMessage());
System.exit(1);
}
}
}