All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.helger.html.markdown.MarkdownProcessor Maven / Gradle / Ivy

/**
 * Copyright (C) 2014-2016 Philip Helger (www.helger.com)
 * philip[at]helger[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.helger.html.markdown;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Locale;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.WillClose;

import com.helger.commons.ValueEnforcer;
import com.helger.commons.io.resource.IReadableResource;
import com.helger.commons.io.stream.NonBlockingStringReader;
import com.helger.commons.io.stream.StreamHelper;
import com.helger.commons.string.StringHelper;

/**
 * Markdown processor class.
 * 

* Example usage: *

* *
 * String result = MarkdownProcessor.process("This is ***TXTMARK***");
 * 
 * 
* * @author René Jeschke <[email protected]> */ public class MarkdownProcessor { /** The emitter. */ private final Emitter m_aEmitter; /** The Configuration. */ final MarkdownConfiguration m_aConfig; /** Extension flag. */ private boolean m_bUseExtensions; public MarkdownProcessor () { this (MarkdownConfiguration.DEFAULT); } /** * Constructor. * * @param aConfig * The configuration to use. May not be null. */ public MarkdownProcessor (@Nonnull final MarkdownConfiguration aConfig) { ValueEnforcer.notNull (aConfig, "Config"); m_aConfig = aConfig; m_bUseExtensions = aConfig.isExtendedProfile (); m_aEmitter = new Emitter (m_aConfig); } /** * Reads all lines from our reader. *

* Takes care of markdown link references. *

* * @return A Block containing all lines. * @throws IOException * If an IO error occurred. */ @Nonnull private Block _readLines (@Nonnull final Reader aReader) throws IOException { final Block block = new Block (); final StringBuilder sb = new StringBuilder (80); int c = aReader.read (); LinkRef aLastLinkRef = null; while (c != -1) { sb.setLength (0); int pos = 0; boolean eol = false; while (!eol) { switch (c) { case -1: eol = true; break; case '\n': c = aReader.read (); if (c == '\r') c = aReader.read (); eol = true; break; case '\r': c = aReader.read (); if (c == '\n') c = aReader.read (); eol = true; break; case '\t': { final int np = pos + (4 - (pos & 3)); while (pos < np) { sb.append (' '); pos++; } c = aReader.read (); break; } default: pos++; sb.append ((char) c); c = aReader.read (); break; } } final Line aLine = new Line (); aLine.m_sValue = sb.toString (); aLine.init (); // Check for link definitions boolean bIsLinkRef = false; String sID = null, sLink = null, sComment = null; if (!aLine.m_bIsEmpty && aLine.m_nLeading < 4 && aLine.m_sValue.charAt (aLine.m_nLeading) == '[') { aLine.m_nPos = aLine.m_nLeading + 1; // Read ID up to ']' sID = aLine.readUntil (']'); // Is ID valid and are there any more characters? if (sID != null && aLine.m_nPos + 2 < aLine.m_sValue.length ()) { // Check for ':' ([...]:...) if (aLine.m_sValue.charAt (aLine.m_nPos + 1) == ':') { aLine.m_nPos += 2; aLine.skipSpaces (); // Check for link syntax if (aLine.m_sValue.charAt (aLine.m_nPos) == '<') { aLine.m_nPos++; sLink = aLine.readUntil ('>'); aLine.m_nPos++; } else sLink = aLine.readUntil (' ', '\n'); // Is link valid? if (sLink != null) { // Any non-whitespace characters following? if (aLine.skipSpaces ()) { final char ch = aLine.m_sValue.charAt (aLine.m_nPos); // Read comment if (ch == '\"' || ch == '\'' || ch == '(') { aLine.m_nPos++; sComment = aLine.readUntil (ch == '(' ? ')' : ch); // Valid linkRef only if comment is valid if (sComment != null) bIsLinkRef = true; } } else bIsLinkRef = true; } } } } // To make compiler happy: add != null checks if (bIsLinkRef && sID != null && sLink != null) { if (sID.toLowerCase (Locale.US).equals ("$profile$")) { m_bUseExtensions = sLink.toLowerCase (Locale.US).equals ("extended"); m_aEmitter.setUseExtensions (m_bUseExtensions); aLastLinkRef = null; } else { // Store linkRef and skip line final LinkRef aLinkRef = new LinkRef (sLink, sComment, sComment != null && (sLink.length () == 1 && sLink.charAt (0) == '*')); m_aEmitter.addLinkRef (sID, aLinkRef); if (sComment == null) aLastLinkRef = aLinkRef; } } else { sComment = null; // Check for multi-line linkRef if (!aLine.m_bIsEmpty && aLastLinkRef != null) { aLine.m_nPos = aLine.m_nLeading; final char ch = aLine.m_sValue.charAt (aLine.m_nPos); if (ch == '\"' || ch == '\'' || ch == '(') { aLine.m_nPos++; sComment = aLine.readUntil (ch == '(' ? ')' : ch); } if (sComment != null) aLastLinkRef.setTitle (sComment); aLastLinkRef = null; } // No multi-line linkRef, store line if (sComment == null) { aLine.m_nPos = 0; block.appendLine (aLine); } } } return block; } /** * Initializes a list block by separating it into list item blocks. * * @param aRoot * The Block to process. */ private void _initListBlock (@Nonnull final Block aRoot) { Line aLine = aRoot.m_aLines; aLine = aLine.m_aNext; while (aLine != null) { final ELineType t = aLine.getLineType (m_bUseExtensions); if (t == ELineType.OLIST || t == ELineType.ULIST || (!aLine.m_bIsEmpty && aLine.m_bPrevEmpty && aLine.m_nLeading == 0 && !(t == ELineType.OLIST || t == ELineType.ULIST))) { aRoot.split (aLine.m_aPrevious).m_eType = EBlockType.LIST_ITEM; } aLine = aLine.m_aNext; } aRoot.split (aRoot.m_aLineTail).m_eType = EBlockType.LIST_ITEM; } /** * Recursively process the given Block. * * @param aRoot * The Block to process. * @param listMode * Flag indicating that we're in a list item block. */ private void _recurse (@Nonnull final Block aRoot, final boolean listMode) { Block aBlock, list; Line aLine = aRoot.m_aLines; if (listMode) { aRoot.removeListIndent (m_bUseExtensions); if (m_bUseExtensions && aRoot.m_aLines != null && aRoot.m_aLines.getLineType (m_bUseExtensions) != ELineType.CODE) { aRoot.m_sId = aRoot.m_aLines.stripID (); } } while (aLine != null && aLine.m_bIsEmpty) aLine = aLine.m_aNext; if (aLine == null) return; while (aLine != null) { final ELineType eType = aLine.getLineType (m_bUseExtensions); switch (eType) { case OTHER: { final boolean bWasEmpty = aLine.m_bPrevEmpty; while (aLine != null && !aLine.m_bIsEmpty) { final ELineType t = aLine.getLineType (m_bUseExtensions); if ((listMode || m_bUseExtensions) && (t == ELineType.OLIST || t == ELineType.ULIST)) break; if (m_bUseExtensions && (t == ELineType.CODE || t == ELineType.FENCED_CODE || t == ELineType.PLUGIN)) break; if (t == ELineType.HEADLINE || t == ELineType.HEADLINE1 || t == ELineType.HEADLINE2 || t == ELineType.HR || t == ELineType.BQUOTE || t == ELineType.XML || t == ELineType.XML_COMMENT) break; aLine = aLine.m_aNext; } final EBlockType bt; if (aLine != null && !aLine.m_bIsEmpty) { bt = (listMode && !bWasEmpty) ? EBlockType.NONE : EBlockType.PARAGRAPH; aRoot.split (aLine.m_aPrevious).m_eType = bt; aRoot.removeLeadingEmptyLines (); } else { bt = (listMode && (aLine == null || !aLine.m_bIsEmpty) && !bWasEmpty) ? EBlockType.NONE : EBlockType.PARAGRAPH; aRoot.split (aLine == null ? aRoot.m_aLineTail : aLine).m_eType = bt; aRoot.removeLeadingEmptyLines (); } aLine = aRoot.m_aLines; break; } case CODE: while (aLine != null && (aLine.m_bIsEmpty || aLine.m_nLeading > 3)) { aLine = aLine.m_aNext; } aBlock = aRoot.split (aLine != null ? aLine.m_aPrevious : aRoot.m_aLineTail); aBlock.m_eType = EBlockType.CODE; aBlock.removeSurroundingEmptyLines (); break; case XML: case XML_COMMENT: if (aLine.m_aPrevious != null) { // FIXME ... this looks wrong aRoot.split (aLine.m_aPrevious); } aRoot.split (aLine.m_aXmlEndLine).m_eType = eType == ELineType.XML ? EBlockType.XML : EBlockType.XML_COMMENT; aRoot.removeLeadingEmptyLines (); aLine = aRoot.m_aLines; break; case BQUOTE: while (aLine != null) { if (!aLine.m_bIsEmpty && aLine.m_bPrevEmpty && aLine.m_nLeading == 0 && aLine.getLineType (m_bUseExtensions) != ELineType.BQUOTE) break; aLine = aLine.m_aNext; } aBlock = aRoot.split (aLine != null ? aLine.m_aPrevious : aRoot.m_aLineTail); aBlock.m_eType = EBlockType.BLOCKQUOTE; aBlock.removeSurroundingEmptyLines (); aBlock.removeBlockQuotePrefix (); _recurse (aBlock, false); aLine = aRoot.m_aLines; break; case HR: if (aLine.m_aPrevious != null) { // FIXME ... this looks wrong aRoot.split (aLine.m_aPrevious); } aRoot.split (aLine).m_eType = EBlockType.RULER; aRoot.removeLeadingEmptyLines (); aLine = aRoot.m_aLines; break; case FENCED_CODE: aLine = aLine.m_aNext; while (aLine != null) { if (aLine.getLineType (m_bUseExtensions) == ELineType.FENCED_CODE) break; // TODO ... is this really necessary? Maybe add a special // flag? aLine = aLine.m_aNext; } if (aLine != null) aLine = aLine.m_aNext; aBlock = aRoot.split (aLine != null ? aLine.m_aPrevious : aRoot.m_aLineTail); aBlock.m_eType = EBlockType.FENCED_CODE; aBlock.m_sMeta = MarkdownHelper.getMetaFromFence (aBlock.m_aLines.m_sValue); aBlock.m_aLines.setEmpty (); if (aBlock.m_aLineTail.getLineType (m_bUseExtensions) == ELineType.FENCED_CODE) aBlock.m_aLineTail.setEmpty (); aBlock.removeSurroundingEmptyLines (); break; case PLUGIN: aLine = aLine.m_aNext; while (aLine != null) { if (aLine.getLineType (m_bUseExtensions) == ELineType.PLUGIN) break; // TODO ... is this really necessary? Maybe add a special // flag? aLine = aLine.m_aNext; } if (aLine != null) aLine = aLine.m_aNext; aBlock = aRoot.split (aLine != null ? aLine.m_aPrevious : aRoot.m_aLineTail); aBlock.m_eType = EBlockType.PLUGIN; aBlock.m_sMeta = MarkdownHelper.getMetaFromFence (aBlock.m_aLines.m_sValue); aBlock.m_aLines.setEmpty (); if (aBlock.m_aLineTail.getLineType (m_bUseExtensions) == ELineType.PLUGIN) aBlock.m_aLineTail.setEmpty (); aBlock.removeSurroundingEmptyLines (); break; case HEADLINE: case HEADLINE1: case HEADLINE2: if (aLine.m_aPrevious != null) aRoot.split (aLine.m_aPrevious); if (eType != ELineType.HEADLINE) aLine.m_aNext.setEmpty (); aBlock = aRoot.split (aLine); aBlock.m_eType = EBlockType.HEADLINE; if (eType != ELineType.HEADLINE) aBlock.m_nHeadlineDepth = eType == ELineType.HEADLINE1 ? 1 : 2; if (m_bUseExtensions) aBlock.m_sId = aBlock.m_aLines.stripID (); aBlock.transfromHeadline (); aRoot.removeLeadingEmptyLines (); aLine = aRoot.m_aLines; break; case OLIST: case ULIST: while (aLine != null) { final ELineType e = aLine.getLineType (m_bUseExtensions); if (!aLine.m_bIsEmpty && (aLine.m_bPrevEmpty && aLine.m_nLeading == 0 && !(e == ELineType.OLIST || e == ELineType.ULIST))) break; aLine = aLine.m_aNext; } list = aRoot.split (aLine != null ? aLine.m_aPrevious : aRoot.m_aLineTail); list.m_eType = eType == ELineType.OLIST ? EBlockType.ORDERED_LIST : EBlockType.UNORDERED_LIST; list.m_aLines.m_bPrevEmpty = false; list.removeSurroundingEmptyLines (); list.m_aLines.m_bPrevEmpty = false; _initListBlock (list); aBlock = list.m_aBlocks; while (aBlock != null) { _recurse (aBlock, true); aBlock = aBlock.m_aNext; } list.expandListParagraphs (); break; default: aLine = aLine.m_aNext; break; } } } @Nonnull public MarkdownProcessingResult process (@Nonnull final IReadableResource aRes) throws IOException { ValueEnforcer.notNull (aRes, "Resource"); return process (aRes.getReader (m_aConfig.getEncoding ())); } @Nonnull public MarkdownProcessingResult process (@Nonnull final InputStream aIS) throws IOException { ValueEnforcer.notNull (aIS, "InputStream"); return process (StreamHelper.getBuffered (StreamHelper.createReader (aIS, m_aConfig.getEncoding ()))); } @Nonnull public MarkdownProcessingResult process (@Nullable final String sText) throws IOException { return process (new NonBlockingStringReader (StringHelper.getNotNull (sText))); } /** * Does all the processing. * * @param aReader * The reader to read from * @return The processing result. * @throws IOException * If an IO error occurred. */ @Nonnull public MarkdownProcessingResult process (@Nonnull @WillClose final Reader aReader) throws IOException { try { final Block aParent = _readLines (aReader); aParent.removeSurroundingEmptyLines (); _recurse (aParent, false); final MarkdownHCStack aOut = new MarkdownHCStack (); Block aBlock = aParent.m_aBlocks; while (aBlock != null) { m_aEmitter.emit (aOut, aBlock); aBlock = aBlock.m_aNext; } return new MarkdownProcessingResult (aOut); } finally { StreamHelper.close (aReader); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy