All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.java.textilej.parser.markup.mediawiki.MediaWikiDialect Maven / Gradle / Ivy

The newest version!
package net.java.textilej.parser.markup.mediawiki;

import java.util.ArrayList;
import java.util.List;

import net.java.textilej.parser.DocumentBuilder.SpanType;
import net.java.textilej.parser.markup.Block;
import net.java.textilej.parser.markup.Dialect;
import net.java.textilej.parser.markup.mediawiki.block.HeadingBlock;
import net.java.textilej.parser.markup.mediawiki.block.ListBlock;
import net.java.textilej.parser.markup.mediawiki.block.ParagraphBlock;
import net.java.textilej.parser.markup.mediawiki.block.PreformattedBlock;
import net.java.textilej.parser.markup.mediawiki.block.TableBlock;
import net.java.textilej.parser.markup.mediawiki.phrase.EscapePhraseModifier;
import net.java.textilej.parser.markup.mediawiki.phrase.SimplePhraseModifier;
import net.java.textilej.parser.markup.mediawiki.token.HyperlinkExternalReplacementToken;
import net.java.textilej.parser.markup.mediawiki.token.HyperlinkInternalReplacementToken;
import net.java.textilej.parser.markup.mediawiki.token.ImageReplacementToken;
import net.java.textilej.parser.markup.mediawiki.token.LineBreakToken;
import net.java.textilej.parser.markup.mediawiki.token.TemplateReplacementToken;
import net.java.textilej.parser.markup.phrase.HtmlCommentPhraseModifier;
import net.java.textilej.parser.markup.phrase.LimitedHtmlEndTagPhraseModifier;
import net.java.textilej.parser.markup.phrase.LimitedHtmlStartTagPhraseModifier;
import net.java.textilej.parser.markup.token.EntityReferenceReplacementToken;
import net.java.textilej.parser.markup.token.ImpliedHyperlinkReplacementToken;
import net.java.textilej.parser.markup.token.PatternLiteralReplacementToken;

/**
 * A dialect for MediaWiki 
 * Wikitext markup, which is the wiki format
 * used by several other major sites.
 * 
 * @author dgreen
 *
 */
public class MediaWikiDialect extends Dialect {
	private List blocks = new ArrayList();
	private List paragraphBreakingBlocks = new ArrayList();

	private static PatternBasedSyntax tokenSyntax = new PatternBasedSyntax();
	private static PatternBasedSyntax phraseModifierSyntax = new PatternBasedSyntax();
	
	private String internalPageHrefPrefix = "/wiki/";
		
	{
		
		// IMPORTANT NOTE: Most items below have order dependencies.  DO NOT REORDER ITEMS BELOW!!
		
		blocks.add(new HeadingBlock());
		blocks.add(new ListBlock());
		blocks.add(new PreformattedBlock());
		blocks.add(new TableBlock());
		final ParagraphBlock paragraphBlock = new ParagraphBlock();
		blocks.add(paragraphBlock); // ORDER DEPENDENCY: this one must be last!!
		
		for (Block block: blocks) {
			if (block == paragraphBlock) {
				continue;
			}
			paragraphBreakingBlocks.add(block);
		}
	}
	static {
		phraseModifierSyntax.beginGroup("(?:(?<=[\\s\\.,\\\"'?!;:\\)\\(\\{\\}\\[\\]])|^)(?:",0);
		phraseModifierSyntax.add(new EscapePhraseModifier());
		phraseModifierSyntax.add(new SimplePhraseModifier("'''''",new SpanType[] { SpanType.BOLD, SpanType.ITALIC },true));
		phraseModifierSyntax.add(new SimplePhraseModifier("'''",SpanType.BOLD,true));
		phraseModifierSyntax.add(new SimplePhraseModifier("''",SpanType.ITALIC,true));
		phraseModifierSyntax.endGroup(")(?=\\W|$)",0);
		
		String[] allowedHtmlTags = new String[] {
			// HANDLED BY LineBreakToken "
", // HANDLED BY LineBreakToken "
", "b", "big", "blockquote", "caption", "center", "cite", "code", "dd", "del", "div", "dl", "dt", "em", "font", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "ins", "li", "ol", "p", "pre", "rb", "rp", "rt", "ruby", "s", "small", "span", "strike", "strong", "sub", "sup", "table", "td", "th", "tr", "tt", "u", "ul", "var" }; phraseModifierSyntax.add(new LimitedHtmlEndTagPhraseModifier(allowedHtmlTags)); phraseModifierSyntax.add(new LimitedHtmlStartTagPhraseModifier(allowedHtmlTags)); phraseModifierSyntax.add(new HtmlCommentPhraseModifier()); tokenSyntax.add(new LineBreakToken()); tokenSyntax.add(new EntityReferenceReplacementToken("(tm)","#8482")); tokenSyntax.add(new EntityReferenceReplacementToken("(TM)","#8482")); tokenSyntax.add(new EntityReferenceReplacementToken("(c)","#169")); tokenSyntax.add(new EntityReferenceReplacementToken("(C)","#169")); tokenSyntax.add(new EntityReferenceReplacementToken("(r)","#174")); tokenSyntax.add(new EntityReferenceReplacementToken("(R)","#174")); tokenSyntax.add(new ImageReplacementToken()); tokenSyntax.add(new HyperlinkInternalReplacementToken()); tokenSyntax.add(new HyperlinkExternalReplacementToken()); tokenSyntax.add(new ImpliedHyperlinkReplacementToken()); tokenSyntax.add(new PatternLiteralReplacementToken("(?:(?<=\\w\\s)(----)(?=\\s\\w))","
")); // horizontal rule tokenSyntax.add(new TemplateReplacementToken()); tokenSyntax.add(new net.java.textilej.parser.markup.mediawiki.token.EntityReferenceReplacementToken()); } @Override protected PatternBasedSyntax getPhraseModifierSyntax() { return phraseModifierSyntax; } @Override protected PatternBasedSyntax getReplacementTokenSyntax() { return tokenSyntax; } @Override public List getBlocks() { return blocks; } public List getParagraphBreakingBlocks() { return paragraphBreakingBlocks; } /** * Convert a page name to an href to the page. * * @param pageName the name of the page to target * * @return the href to access the page * * @see #getInternalPageHrefPrefix() */ public String toInternalHref(String pageName) { String pageId = pageName.replace(' ', '_'); if (pageId.startsWith(":")) { // category pageId = pageId.substring(1); } else if (pageId.startsWith("#")) { // internal anchor return pageId; } return internalPageHrefPrefix + pageId; } /** * Get the href prefix for references to internal pages. The default value is /wiki/. */ public String getInternalPageHrefPrefix() { return internalPageHrefPrefix; } /** * Set the href prefix for references to internal pages. The default value is /wiki/. */ public void setInternalPageHrefPrefix(String internalPageHrefPrefix) { this.internalPageHrefPrefix = internalPageHrefPrefix; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy