All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sweble.wikitext.engine.output.HtmlRenderer Maven / Gradle / Ivy

Go to download

A minimal engine using the Sweble Wikitext Parser to process articles in the context of a MediaWiki-like configuration.

There is a newer version: 3.1.9
Show newest version
/**
 * Copyright 2011 The Open Source Research Group,
 *                University of Erlangen-Nürnberg
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sweble.wikitext.engine.output;

import de.fau.cs.osr.utils.FmtNotYetImplementedError;
import de.fau.cs.osr.utils.StringTools;
import de.fau.cs.osr.utils.visitor.VisitingException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.nodes.*;
import org.sweble.wikitext.engine.utils.EngineAstTextUtils;
import org.sweble.wikitext.engine.utils.UrlEncoding;
import org.sweble.wikitext.parser.nodes.*;
import org.sweble.wikitext.parser.nodes.WtImageLink.ImageHorizAlign;
import org.sweble.wikitext.parser.nodes.WtImageLink.ImageViewFormat;
import org.sweble.wikitext.parser.parser.LinkTargetException;
import org.sweble.wikitext.parser.utils.StringConversionException;
import org.sweble.wikitext.parser.utils.WtRtDataPrinter;

import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

public class HtmlRenderer
		extends
			HtmlRendererBase
		implements
			CompleteEngineVisitorNoReturn
{
	// Fix #62: Counter for sequential number for untitled external links
	private long untitledLinkCounter = 1L;

	// =====================================================================

	@Override
	protected WtNode before(WtNode node)
	{
		untitledLinkCounter = 1L;
		return super.before(node);
	}

	// =====================================================================

	@Override
	public void visit(EngProcessedPage n)
	{
		dispatch(n.getPage());
	}

	@Override
	public void visit(EngNowiki n)
	{
		wrapText(n.getContent());
	}

	public void visit(EngPage n)
	{
		iterate(n);
	}

	@Override
	public void visit(EngSoftErrorNode n)
	{
		visit((WtXmlElement) n);
	}

	@Override
	public void visit(WtBody n)
	{
		iterate(n);
	}

	public void visit(WtBold n)
	{
		p.indentAtBol("");
		p.incIndent();
		iterate(n);
		p.decIndent();
		p.indentAtBol("");
	}

	public void visit(WtDefinitionList n)
	{
		p.indentln("
"); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
"); } public void visit(WtDefinitionListDef n) { p.indentln("
"); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
"); } public void visit(WtDefinitionListTerm n) { p.indentln("
"); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
"); } public void visit(WtExternalLink n) { if (n.hasTitle()) { p.indentAtBol(); pt("%!", callback.makeUrl(n.getTarget()), n.getTitle()); } else { // Fix #62: Use sequential number if the title is missing long seqNumber = untitledLinkCounter++; pt("[" + seqNumber + "]", callback.makeUrl(n.getTarget())); } } @Override public void visit(WtHeading n) { // We handle this case in WtSection and don't dispatch to the heading. throw new AssertionError(); } @Override public void visit(WtHorizontalRule n) { p.indentAtBol("
"); } @Override public void visit(WtIgnored n) { // Well, ignore it ... } @Override public void visit(WtIllegalCodePoint n) { p.indentAtBol(); final String cp = n.getCodePoint(); for (int i = 0; i < cp.length(); ++i) pf("&#%d;", (int) cp.charAt(i)); } public void visit(WtImageLink n) { if (!n.getTarget().isResolved()) { printAsWikitext(n); return; } PageTitle target; try { target = PageTitle.make(wikiConfig, n.getTarget().getAsString()); } catch (LinkTargetException e) { throw new VisitingException(e); } int imgWidth = n.getWidth(); int imgHeight = n.getHeight(); switch (n.getFormat()) { case THUMBNAIL: // FALL THROUGH case FRAMELESS: if (imgWidth <= 0) imgWidth = 180; break; default: break; } if (n.getUpright()) { imgWidth = 140; imgHeight = -1; } MediaInfo info; try { info = callback.getMediaInfo( target.getNormalizedFullTitle(), imgWidth, imgHeight); } catch (Exception e) { throw new VisitingException(e); } boolean exists = (info != null && info.getImgUrl() != null); boolean isImage = !target.getTitle().endsWith(".ogg"); if (exists && imgHeight > 0) { int altWidth = imgHeight * info.getImgWidth() / info.getImgHeight(); if (altWidth < imgWidth) { imgWidth = altWidth; try { info = callback.getMediaInfo( target.getNormalizedFullTitle(), imgWidth, imgHeight); } catch (Exception e) { throw new VisitingException(e); } } } boolean scaled = imgWidth > 0 || imgHeight > 0; String imgUrl = null; if (exists) { imgUrl = info.getImgUrl(); if (scaled && info.getThumbUrl() != null) imgUrl = info.getThumbUrl(); } String aClasses = ""; String imgClasses = ""; switch (n.getFormat()) { case THUMBNAIL: imgClasses += " thumbimage"; break; default: break; } if (n.getBorder()) imgClasses += " thumbborder"; // -- does the image link something? -- WtUrl linkUrl = null; PageTitle linkTarget = target; switch (n.getLink().getTargetType()) { case NO_LINK: linkTarget = null; break; case PAGE: { WtPageName pageName = (WtPageName) n.getLink().getTarget(); if (pageName.isResolved()) { try { linkTarget = PageTitle.make(wikiConfig, pageName.getAsString()); } catch (LinkTargetException e) { throw new VisitingException(e); } } else { linkTarget = null; } break; } case URL: linkTarget = null; linkUrl = (WtUrl) n.getLink().getTarget(); break; case DEFAULT: if (exists && isImage) aClasses += " image"; break; } // -- string caption -- String strCaption = null; if (n.hasTitle()) strCaption = makeImageCaption(n); // -- alt -- String alt = null; if (n.hasAlt()) alt = makeImageAltText(n); // -- classes if (!aClasses.isEmpty()) aClasses = String.format(" class=\"%s\"", aClasses.trim()); // -- title -- String aTitle = ""; if (n.getFormat() != ImageViewFormat.FRAMELESS) { if (strCaption != null) { aTitle = strCaption; } else if (linkTarget != null) { aTitle = makeImageTitle(n, target);//makeUrl(linkTarget); } else if (linkUrl != null) { aTitle = callback.makeUrl(linkUrl); } } if (!aTitle.isEmpty()) aTitle = String.format(" title=\"%s\"", aTitle); // -- width & height -- int width = -1; int height = -1; if (exists) { width = scaled ? info.getThumbWidth() : info.getImgWidth(); height = scaled ? info.getThumbHeight() : info.getImgHeight(); } else width = 180; // -- generate html -- boolean hasThumbFrame = isImage && n.getFormat() == ImageViewFormat.THUMBNAIL || n.getHAlign() != ImageHorizAlign.UNSPECIFIED; if (hasThumbFrame) { String align = ""; switch (n.getHAlign()) { case CENTER: align = " center"; break; case LEFT: align = " tleft"; break; case RIGHT: // FALL THROUGH case NONE: // FALL THROUGH default: align = " tright"; break; } String thumb = ""; String inner = "floatnone"; String style = ""; if (n.getFormat() == ImageViewFormat.THUMBNAIL) { thumb = "thumb"; inner = "thumbinner"; style = String.format(" style=\"width:%dpx;\"", width + 2); } p.indent(); pf(""); } } @Override public void visit(WtImEndTag n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtImStartTag n) { // Should not happen ... throw new AssertionError(); } public void visit(WtInternalLink n) { if (!n.getTarget().isResolved()) { printAsWikitext(n); return; } p.indentAtBol(); PageTitle target; try { target = PageTitle.make(wikiConfig, n.getTarget().getAsString()); } catch (LinkTargetException e) { throw new VisitingException(e); } // FIXME: I think these should be removed in the parser already?! if (target.getNamespace() == wikiConfig.getNamespace("Category")) return; if (!callback.resourceExists(target)) { String title = target.getDenormalizedFullTitle(); String path = UrlEncoding.WIKI.encode(target.getNormalizedFullTitle()); if (n.hasTitle()) { pt("%=%!%=", callback.makeUrlMissingTarget(path), title, n.getPrefix(), n.getTitle(), n.getPostfix()); } else { String linkText = makeTitleFromTarget(n, target); pt("%=%=%=", callback.makeUrlMissingTarget(path), title, n.getPrefix(), linkText, n.getPostfix()); } } else { if (!target.equals(pageTitle)) { if (n.hasTitle()) { pt("%=%!%=", callback.makeUrl(target), makeLinkTitle(n, target), n.getPrefix(), n.getTitle(), n.getPostfix()); } else { pt("%=%=%=", callback.makeUrl(target), makeLinkTitle(n, target), n.getPrefix(), makeTitleFromTarget(n, target), n.getPostfix()); } } else { if (n.hasTitle()) { pt("%=%!%=", n.getPrefix(), n.getTitle(), n.getPostfix()); } else { pt("%=%=%=", n.getPrefix(), makeTitleFromTarget(n, target), n.getPostfix()); } } } } public void visit(WtItalics n) { p.indentAtBol(""); p.incIndent(); iterate(n); p.decIndent(); p.indentAtBol(""); } @Override public void visit(WtLinkOptionAltText n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkOptionGarbage n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkOptionKeyword n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkOptionLinkTarget n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkOptionResize n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkOptions n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtLinkTitle n) { iterate(n); } public void visit(WtListItem n) { p.indentln("
  • "); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
  • "); } @Override public void visit(WtName n) { iterate(n); } public void visit(WtNewline n) { if (!p.atBol()) p.print(" "); } @Override public void visit(WtNodeList n) { iterate(n); } @Override public void visit(WtOnlyInclude n) { iterate(n); } public void visit(WtOrderedList n) { p.indentln("
      "); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
    "); } @Override public void visit(WtPageName n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtPageSwitch n) { // Hide those... } public void visit(WtParagraph n) { p.indentln("

    "); p.incIndent(); iterate(n); p.decIndent(); p.indentln("

    "); } @Override public void visit(WtParsedWikitextPage n) { iterate(n); } @Override public void visit(WtPreproWikitextPage n) { iterate(n); } @Override public void visit(WtRedirect n) { // Fixes issue #65, we render a link to the redirect target PageTitle pt; try { pt = PageTitle.make(this.wikiConfig, n.getTarget().getAsString()); } catch (LinkTargetException e) { throw new VisitingException(e); } String url = callback.makeUrl(pt); pf("%s", url, pt.getDenormalizedFullTitle()); } public void visit(WtSection n) { p.indent(); pt("%!", n.getLevel(), makeSectionTitle(n.getHeading()), n.getHeading(), n.getLevel()); p.println(); dispatch(n.getBody()); } public void visit(WtSemiPre n) { p.indent(); ++inPre; pt("
    %!
    ", n); --inPre; p.println(); } public void visit(WtSemiPreLine n) { iterate(n); p.println(); } @Override public void visit(WtSignature n) { // TODO: Implement throw new FmtNotYetImplementedError(); } public void visit(WtTable n) { p.indent(); pt("", cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); fixTableBody(n.getBody()); p.decIndent(); p.indentln(""); } @Override public void visit(WtTableCaption n) { p.indent(); pt("", cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); dispatch(getCellContent(n.getBody())); p.decIndent(); p.indentln(""); } public void visit(WtTableCell n) { p.indent(); pt("", cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); dispatch(getCellContent(n.getBody())); p.decIndent(); p.indentln(""); } public void visit(WtTableHeader n) { p.indent(); pt("", cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); dispatch(getCellContent(n.getBody())); p.decIndent(); p.indentln(""); } public void visit(WtTableRow n) { boolean cellsDefined = false; for (WtNode cell : n.getBody()) { switch (cell.getNodeType()) { case WtNode.NT_TABLE_CELL: case WtNode.NT_TABLE_HEADER: cellsDefined = true; break; } } if (cellsDefined) { p.indent(); pt("", cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); dispatch(getCellContent(n.getBody())); p.decIndent(); p.indentln(""); } else { iterate(n.getBody()); } } public void visit(WtTableImplicitTableBody n) { iterate(n.getBody()); } public void visit(WtTagExtension n) { // TODO: Should not get skipped! if (n.getName().trim().equalsIgnoreCase("ref")) return; if (n.getName().trim().equalsIgnoreCase("references")) return; printAsWikitext(n); /* pc("<%s%!>%=</%s>", n.getName(), n.getXmlAttributes(), n.getBody().getContent(), n.getName()); */ } @Override public void visit(WtTagExtensionBody n) { // Should not happen ... throw new AssertionError(); } @Override public void visit(WtTemplate n) { printAsWikitext(n); } @Override public void visit(WtTemplateArgument n) { printAsWikitext(n); } @Override public void visit(WtTemplateArguments n) { printAsWikitext(n); } @Override public void visit(WtTemplateParameter n) { printAsWikitext(n); } public void visit(WtText n) { wrapText(n.getContent()); } @Override public void visit(WtTicks n) { // Should not happen ... throw new AssertionError(); } public void visit(WtUnorderedList n) { p.indentln("
      "); p.incIndent(); iterate(n); p.decIndent(); p.indentln("
    "); } public void visit(WtUrl n) { p.indentAtBol(); String url = callback.makeUrl(n); pf("%s", url, url); } @Override public void visit(WtValue n) { iterate(n); } @Override public void visit(WtWhitespace n) { if (!p.atBol()) p.println(" "); } public void visit(WtXmlAttribute n) { if (!n.getName().isResolved()) { logger.warn("Unresolved attribute name: " + WtRtDataPrinter.print(n)); } else { if (n.hasValue()) { pt(" %s=\"%~\"", n.getName().getAsString(), cleanAttribValue(n.getValue())); } else { pf(" %s=\"%", n.getName(), cleanAttribs(n.getXmlAttributes())); p.println(); p.incIndent(); dispatch(n.getBody()); p.decIndent(); p.indent(); pf("", n.getName()); p.println(); } else { p.indentAtBol(); pt("<%s%!>", n.getName(), cleanAttribs(n.getXmlAttributes())); p.incIndent(); dispatch(n.getBody()); p.decIndent(); p.indentAtBol(); pf("", n.getName()); } } else { p.indentAtBol(); pt("<%s%! />", n.getName(), cleanAttribs(n.getXmlAttributes())); } } public void visit(WtXmlEmptyTag n) { printAsWikitext(n); } public void visit(WtXmlEndTag n) { printAsWikitext(n); } public void visit(WtXmlEntityRef n) { p.indentAtBol(); pf("&%s;", n.getName()); } public void visit(WtXmlStartTag n) { printAsWikitext(n); } // ===================================================================== private void wrapText(String text) { if (inPre > 0) { p.print(esc(text)); } else { p.indentAtBol(esc(StringTools.collapseWhitespace(text))); } } /* private void wrapText(String text) { if (inPre > 0) { p.print(esc(text)); } else { int i = 0; int len = text.length(); while (i < len) { char ch; // If at beginning of line skip whitespace if (p.atBol()) { while (i < len) { ch = text.charAt(i); if (!Character.isWhitespace(ch)) break; ++i; } } if (i >= len) break; p.flush(); int col = p.getColumn(); int border = 80 + p.getIndent() * 4; int j = i; while (j < len) { ch = text.charAt(j++); if (col >= border && Character.isWhitespace(ch)) break; if (ch == '\n') break; } String substr = text.substring(i, j); if (!substr.isEmpty()) p.indentAtBol(esc(StringTools.collapseWhitespace(substr))); if (i < len) p.println(); i = j; } } } */ private void printAsWikitext(WtNode n) { // TODO: Implement //throw new FmtNotYetImplementedError(); //p.indentAtBol(); } private String toWikitext(WtNode value) { // TODO: Implement //throw new FmtNotYetImplementedError(); return ""; } // ===================================================================== private String makeSectionTitle(WtHeading n) { byte[] title; try { title = makeTitleFromNodes(n).getBytes("UTF8"); } catch (UnsupportedEncodingException e) { throw new VisitingException(e); } StringBuilder b = new StringBuilder(); for (byte u : title) { if (u < 0) { b.append('.'); b.append(String.format("%02X", u)); } else if (u == ' ') { b.append('_'); } else { b.append((char) u); } } return b.toString(); } private String makeImageAltText(WtImageLink n) { return makeTitleFromNodes(n.getAlt()); } protected String makeImageCaption(WtImageLink n) { return makeTitleFromNodes(n.getTitle()); } private String makeTitleFromNodes(WtNodeList titleNode) { StringWriter w = new StringWriter(); SafeLinkTitlePrinter p = new SafeLinkTitlePrinter(w, wikiConfig); p.go(titleNode); return w.toString(); } // ===================================================================== static String makeLinkTitle(WtInternalLink n, PageTitle target) { return target.getDenormalizedFullTitle(); } protected String makeImageTitle(WtImageLink n, PageTitle target) { return target.getDenormalizedFullTitle(); } private String makeTitleFromTarget(WtInternalLink n, PageTitle target) { return makeTitleFromTarget(target, n.getTarget()); } private String makeTitleFromTarget(PageTitle target, WtPageName title) { String targetStr = title.getAsString(); if (target.hasInitialColon() && !targetStr.isEmpty() && targetStr.charAt(0) == ':') targetStr = targetStr.substring(1); return targetStr; } // ===================================================================== /** * Pull garbage in between rows in front of the table. */ private void fixTableBody(WtNodeList body) { boolean hadRow = false; WtTableRow implicitRow = null; for (WtNode c : body) { switch (c.getNodeType()) { case WtNode.NT_TABLE_HEADER: // fall through! case WtNode.NT_TABLE_CELL: { if (hadRow) { dispatch(c); } else { if (implicitRow == null) implicitRow = nf.tr(nf.emptyAttrs(), nf.body(nf.list())); implicitRow.getBody().add(c); } break; } case WtNode.NT_TABLE_CAPTION: { if (!hadRow && implicitRow != null) dispatch(implicitRow); implicitRow = null; dispatch(c); break; } case WtNode.NT_TABLE_ROW: { if (!hadRow && implicitRow != null) dispatch(implicitRow); hadRow = true; dispatch(c); break; } default: { if (!hadRow && implicitRow != null) implicitRow.getBody().add(c); else dispatch(c); break; } } } } /** * If the cell content is only one paragraph, the content of the paragraph * is returned. Otherwise the whole cell content is returned. This is done * to render cells with a single paragraph without the paragraph tags. */ protected static WtNode getCellContent(WtNodeList body) { if (body.size() >= 1 && body.get(0) instanceof WtParagraph) { boolean ok = true; for (int i = 1; i < body.size(); ++i) { if (!(body.get(i) instanceof WtNewline)) { ok = false; break; } } if (ok) body = (WtParagraph) body.get(0); } return body; } // ===================================================================== protected String cleanAttribValue(WtNodeList value) { try { return StringTools.collapseWhitespace(tu.astToText(value)).trim(); } catch (StringConversionException e) { return toWikitext(value); } } protected WtNodeList cleanAttribs(WtNodeList xmlAttributes) { ArrayList clean = null; WtXmlAttribute style = null; for (WtNode a : xmlAttributes) { if (a instanceof WtXmlAttribute) { WtXmlAttribute attr = (WtXmlAttribute) a; if (!attr.getName().isResolved()) continue; String name = attr.getName().getAsString().toLowerCase(); if (name.equals("style")) { style = attr; } else if (name.equals("width")) { if (clean == null) clean = new ArrayList(); clean.add(attr); } else if (name.equals("align")) { if (clean == null) clean = new ArrayList(); clean.add(attr); } } } if (clean == null || clean.isEmpty()) return xmlAttributes; String newStyle = ""; if (style != null) newStyle = cleanAttribValue(style.getValue()); for (WtXmlAttribute a : clean) { if (!a.getName().isResolved()) continue; String name = a.getName().getAsString().toLowerCase(); if (name.equals("align")) { newStyle = String.format( "text-align: %s; ", cleanAttribValue(a.getValue())) + newStyle; } else { newStyle = String.format( "%s: %s; ", name, cleanAttribValue(a.getValue())) + newStyle; } } WtXmlAttribute newStyleAttrib = nf.attr( nf.name(nf.list(nf.text("style"))), nf.value(nf.list(nf.text(newStyle)))); WtNodeList newAttribs = nf.attrs(nf.list()); for (WtNode a : xmlAttributes) { if (a == style) { newAttribs.add(newStyleAttrib); } else if (clean.contains(a)) { // Remove } else { // Copy the rest newAttribs.add(a); } } if (style == null) newAttribs.add(newStyleAttrib); return newAttribs; } // ========================================================================= public static String print( HtmlRendererCallback callback, WikiConfig wikiConfig, PageTitle pageTitle, T node) { return print(callback, wikiConfig, new StringWriter(), pageTitle, node).toString(); } public static Writer print( HtmlRendererCallback callback, WikiConfig wikiConfig, Writer writer, PageTitle pageTitle, T node) { new HtmlRenderer(callback, wikiConfig, pageTitle, writer).go(node); return writer; } // ========================================================================= protected static final Logger logger = LoggerFactory.getLogger(HtmlRenderer.class); protected static final Set blockElements = new HashSet(); protected final WikiConfig wikiConfig; protected final PageTitle pageTitle; protected final EngineNodeFactory nf; protected final EngineAstTextUtils tu; protected final HtmlRendererCallback callback; protected int inPre = 0; static { // left out del and ins, added table elements blockElements.add("div"); blockElements.add("address"); blockElements.add("blockquote"); blockElements.add("center"); blockElements.add("dir"); blockElements.add("div"); blockElements.add("dl"); blockElements.add("fieldset"); blockElements.add("form"); blockElements.add("h1"); blockElements.add("h2"); blockElements.add("h3"); blockElements.add("h4"); blockElements.add("h5"); blockElements.add("h6"); blockElements.add("hr"); blockElements.add("isindex"); blockElements.add("menu"); blockElements.add("noframes"); blockElements.add("noscript"); blockElements.add("ol"); blockElements.add("p"); blockElements.add("pre"); blockElements.add("table"); blockElements.add("ul"); blockElements.add("center"); blockElements.add("caption"); blockElements.add("tr"); blockElements.add("td"); blockElements.add("th"); blockElements.add("colgroup"); blockElements.add("thead"); blockElements.add("tbody"); blockElements.add("tfoot"); } // ========================================================================= protected HtmlRenderer( HtmlRendererCallback callback, WikiConfig wikiConfig, PageTitle pageTitle, Writer w) { super(w); this.callback = callback; this.wikiConfig = wikiConfig; this.pageTitle = pageTitle; this.nf = wikiConfig.getNodeFactory(); this.tu = wikiConfig.getAstTextUtils(); } }




    © 2015 - 2024 Weber Informatics LLC | Privacy Policy