All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.threerings.gwt.util.WikiParser Maven / Gradle / Ivy

The newest version!
//
// $Id$
//
// OOO GWT Utils - utilities for creating GWT applications
// Copyright (C) 2009-2010 Three Rings Design, Inc., All Rights Reserved
// http://code.google.com/p/ooo-gwt-utils/
//
// This library is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation; either version 2.1 of the License, or
// (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package com.threerings.gwt.util;

import java.util.HashSet;

import static com.threerings.gwt.util.WikiUtils.*;

/**
 * Renders Creole wiki text into XHTML. Adapted for GWT from Java parser. {@link #render} takes
 * wiki text and returns XHTML.
 *
 * 

WikiParser's behavior can be customized by overriding appendXxx() methods, which should make * integration of this class into any wiki/blog/forum software easy and painless.

* *

See http://code.google.com/p/t4-wiki-parser/ and http://www.wikicreole.org/

* * @author Yaroslav Stavnichiy ([email protected]) */ public class WikiParser { /** * Renders the supplied wiki text to XHTML. */ public static String render (String wikiText) { return new WikiParser().doRender(wikiText); } /** * Renders the supplied wiki text snippet to XHTML. This method differs from {@link #render} in * that it expects a single line of text which may contain inline formatting, but contains no * block formatting. */ public static String renderSnippet (String wikiText) { return new WikiParser().doRenderSnippet(wikiText); } protected String doRender (String text) { wikiText=preprocessWikiText(text); wikiLength=wikiText.length(); wikiChars=new char[wikiLength]; wikiText.getChars(0, wikiLength, wikiChars, 0); while (parseBlock()); closeListsAndTables(); while (mediawikiTableLevel-- > 0) sb.append("\n"); completeTOC(); return sb.toString(); } protected String doRenderSnippet (String text) { wikiText=preprocessWikiText(text); wikiLength=wikiText.length(); wikiChars=new char[wikiLength]; wikiText.getChars(0, wikiLength, wikiChars, 0); parseItem(pos, null, ContextType.PARAGRAPH); return sb.toString(); } // protected void appendMacro(String text) { // if ("TOC".equals(text)) { // sb.append("<<>>"); // put TOC placeholder for replacing it later with real TOC // } // else { // sb.append("<<<Macro:"); // appendText(text); // sb.append(">>>"); // } // } protected void appendLink (String text) { String[] link = split(text, '|'); String uri = link[0].trim(); String name = (link.length >= 2 && !isEmpty(link[1].trim())) ? link[1] : uri; if (isAbsoluteURI(uri)) { appendExternalLink(uri, name); } else { appendInternalLink(uri, name); } } protected void appendExternalLink (String uri, String text) { sb.append(""); appendText(text); sb.append(""); } protected void appendInternalLink (String uri, String text) { sb.append(""); appendText(text); sb.append(""); } protected void appendImage (String text) { String[] link = split(text, '|'); String uri = link[0].trim(); String name = (link.length >= 2 && !isEmpty(link[1].trim())) ? link[1] : uri; if (isAbsoluteURI(uri)) { appendExternalImage(uri, name); } else { appendInternalImage(uri, name); } } protected void appendExternalImage (String uri, String text) { String alt = escapeHTML(unescapeHTML(text)); sb.append("\""+alt+"\""); } protected void appendInternalImage (String uri, String text) { sb.append("<<<Internal image(?): "); appendText(uri + " " + text); sb.append(">>>"); } protected void appendText (String text) { sb.append(escapeHTML(unescapeHTML(text))); } protected String generateTOCAnchorId (int hLevel, String text) { int i=0; String id=(HEADING_ID_PREFIX!=null ? HEADING_ID_PREFIX : "H"+hLevel+"_")+translit(text.replaceAll("<.+?>", "")).trim().replaceAll( "\\s+", "_").replaceAll("[^a-zA-Z0-9_-]", ""); while (tocAnchorIds.contains(id)) { // avoid duplicates i++; id=text+"_"+i; } tocAnchorIds.add(id); return id; } protected void appendTOCItem (int level, String anchorId, String text) { if (level>tocLevel) { while (level>tocLevel) { toc.append("
  • "); tocLevel++; } } else { while (level
"); tocLevel--; } toc.append("\n
  • "); } toc.append(""+text+""); } protected void completeTOC () { while (0"); tocLevel--; } int idx; String tocDiv="
    "+toc.toString()+"
    "; while ((idx=sb.indexOf("<<>>"))>=0) { sb.replace(idx, idx+9, tocDiv); } } protected void appendNowiki (String text) { sb.append(escapeHTML(replaceString(replaceString(text, "~{{{", "{{{"), "~}}}", "}}}"))); } private void closeListsAndTables () { // close unclosed lists while (listLevel>=0) { sb.append(LIST_CLOSE[LIST_CHARS.indexOf(listLevels[listLevel--])]); } if (inTable) { sb.append("\n"); inTable=false; } } private boolean parseBlock () { pos = skipSpacesToNewline(pos, wikiLength); if (pos>=wikiLength) return false; char c=wikiChars[pos]; if (c=='\n') { // blank line => end of list/table; no other meaning closeListsAndTables(); pos++; return true; } if (c=='|') { // table if (mediawikiTableLevel>0) { int pp=pos+1; if (pp it's mediawiki-table markup if (pp==wikiLength || wikiChars[pp]=='\n') { closeListsAndTables(); // close lists if any sb.append(newRow? "\n": (endTable? "\n":"\n")); if (endTable) mediawikiTableLevel--; pos=pp+1; return pp"); inTable=true; } pos=parseTableRow(pos+1); return true; } else { if (inTable) { sb.append("\n"); inTable=false; } } if (listLevel>=0 || LIST_CHARS.indexOf(c)>=0) { // lists int lc; // count list level for (lc=0; lc<=listLevel && pos+lc retry from the same position blockquoteBR=true; return true; } else { if (pos+lc>=wikiLength) return false; char cc=wikiChars[pos+lc]; int listType=LIST_CHARS.indexOf(cc); if (listType>=0 && pos+lc+1=0) { // list item - same level if (listLevels[listLevel]=='>' || listLevels[listLevel]==':') sb.append('\n'); else if (listLevels[listLevel]=='!') sb.append("
  • \n
    "); else sb.append("\n
  • "); pos=parseListItem(pos+lc); return true; } } } if (c=='=') { // heading int hc; // count heading level for (hc=1; hc<6 && pos+hc=wikiLength) return false; int p = skipSpacesTabs(pos+hc, wikiLength); // skip spaces String tagName="h"+(hc+HEADING_LEVEL_SHIFT); sb.append("<"+tagName+" id=''>"); // real id to be inserted after parsing this item int hStart=sb.length(); pos=parseItem(p, wikiText.substring(pos, pos+hc), ContextType.HEADER); String hText=sb.substring(hStart, sb.length()); sb.append("\n"); String anchorId=generateTOCAnchorId(hc, hText); sb.insert(hStart-2, anchorId); appendTOCItem(hc, anchorId, hText); return true; } else if (c=='<' || c =='>') { // <<< is float left, >>> is float right if (pos+2 < wikiLength && wikiChars[pos+1] == c && wikiChars[pos+2] == c) { pos = skipSpacesTabs(pos+3, wikiLength); // skip whitespace String side = (c == '<') ? "left" : "right"; String margin = (c == '<') ? "right" : "left"; sb.append("
    "); pos = parseItem(pos, null, ContextType.HEADER); sb.append("
    "); return true; } } else if (c=='{') { // nowiki-block? if (pos+2=startNowiki) { // block
                        if (wikiChars[startNowiki]=='\n') startNowiki++; // skip the very first '\n'
                        if (wikiChars[endNowiki-1]=='\n') endNowiki--; // omit the very last '\n'
                        sb.append("
    ");
                        appendNowiki(wikiText.substring(startNowiki, endNowiki));
                        sb.append("
    \n"); pos=endPos; return true; } // else inline - proceed to regular paragraph handling } else if (pos+1"); mediawikiTableLevel++; pos=pp+1; return pp\n"); pos=p; return true; } } else if (c=='~') { // block-level escaping: '*' '-' '#' '>' ':' '!' '|' '=' if (pos+1' || nc==':' || nc=='-' || nc=='|' || nc=='=' || nc=='!') { pos++; // skip '~' and proceed to regular paragraph handling c=nc; } else if (nc=='*' || nc=='#') { // might be inline markup so need to double check char nnc=pos+2"); pos=parseItem(pos, null, ContextType.PARAGRAPH); sb.append("

    \n"); return true; } } private int skipSpacesTabs (int start, int end) { int pos = start; while (pos < end && (wikiChars[pos] == ' ' || wikiChars[pos] == '\t')) pos++; return pos; } private int skipSpacesToNewline (int start, int end) { int pos = start; while (pos < end && wikiChars[pos] <= ' ' && wikiChars[pos] != '\n') pos++; return pos; } /** * Finds first closing '}}}' for nowiki block or span. * Skips escaped sequences: '~}}}'. * * @param startBlock points to first char after '{{{' * @return position of first '}' in closing '}}}' */ private int findEndOfNowiki (int startBlock) { // NOTE: this method could step back one char from startBlock position int endBlock=startBlock-3; do { endBlock=wikiText.indexOf("}}}", endBlock+3); if (endBlock<0) return wikiLength; // no matching '}}}' found while (endBlock+30 && wikiChars[nextBlock-1]=='~'); if (nextBlock<0) nextBlock=wikiLength; int endBlock=wikiText.lastIndexOf("}}}", nextBlock); if (endBlock>=startBlock && wikiChars[endBlock-1]!='~') return endBlock; } while (nextBlock=wikiLength) return wikiLength; sb.append(""); boolean endOfRow=false; do { int colspan=0; while (start+colspan=wikiLength || wikiChars[start]=='\n') { // skip last empty column start++; // eat '\n' break; } sb.append(th? "1) sb.append(" colspan=\""+colspan+"\""); sb.append('>'); try { parseItemThrow(start, null, ContextType.TABLE_CELL); } catch (EndOfSubContextException e) { // end of cell start=e.position; if (start>=wikiLength) endOfRow=true; else if (wikiChars[start]=='\n') { start++; // eat '\n' endOfRow=true; } } catch (EndOfContextException e) { start=e.position; endOfRow=true; } sb.append(th? "":""); } while (!endOfRow/* && start\n"); return start; } /** * Same as parseItem(); blank line adds {@code

    }. * * @param start */ private int parseListItem (int start) { start = skipSpacesToNewline(start, wikiLength); // skip spaces int end=parseItem(start, null, ContextType.LIST_ITEM); if ((listLevels[listLevel]=='>' || listLevels[listLevel]==':') && wikiText.substring(start, end).trim().length()==0) { // empty line within blockquote/div if (!blockquoteBR) { sb.append("

    "); blockquoteBR=true; } } else { blockquoteBR=false; } return end; } /** * @param p points to first slash in suspected URI (scheme://etc) * @param start points to beginning of parsed item * @param end points to end of parsed item * * @return array of two integer offsets [begin_uri, end_uri] if matched, null otherwise */ private int[] checkURI (int p, int start, int end) { if (p>start && wikiChars[p-1]==':') { // "://" found int pb=p-1; while (pb>start && isLatinLetterOrDigit(wikiChars[pb-1])) pb--; int pe=p+2; while (pep+2 && ",.;:?!%)".indexOf(wikiChars[pe-1])>=0) pe--; if (isURI(wikiText.substring(pb, pe))) { uri = wikiText.substring(pb, pe); } else { pe--; // try chopping from the end } } while (uri==null && pe>p+2); if (uri!=null && isAbsoluteURI(uri)) { int offs[]= {pb, pe}; return offs; } } return null; } private int checkApplyURI (StringBuilder tb, int p, int start, int end) { int[] uriOffs=checkURI(p, start, end); if (uriOffs == null) return p; flushToText(tb); // flush text buffer int pb=uriOffs[0], pe=uriOffs[1]; if (pb>start && wikiChars[pb-1]=='~') { sb.delete(sb.length()-(p-pb+1), sb.length()); // roll back URL + ~ sb.append(escapeHTML(wikiText.substring(pb, pe))); } else { sb.delete(sb.length()-(p-pb), sb.length()); // roll back URL appendLink(wikiText.substring(pb, pe)); } return pe; } private int parseItem (int start, String delimiter, ContextType context) { try { return parseItemThrow(start, delimiter, context); } catch (EndOfContextException e) { return e.position; } } private int parseItemThrow (int start, String delimiter, ContextType context) throws EndOfContextException { StringBuilder tb=new StringBuilder(); boolean specialCaseDelimiterHandling="//".equals(delimiter); int p=start; int end=wikiLength; try { nextChar: while(true) { if (p>=end) throw new EndOfContextException(end); //break; if (delimiter!=null && wikiText.startsWith(delimiter, p)) { if (!specialCaseDelimiterHandling || checkURI(p, start, end)==null) { p+=delimiter.length(); return p; } } char c=wikiChars[p]; boolean atLineStart=false; // context-defined break test if (c=='\n') { if (context==ContextType.HEADER || context==ContextType.TABLE_CELL) { p++; throw new EndOfContextException(p); } if (p+1=end) throw new EndOfContextException(p); // end of text reached c=wikiChars[p]; atLineStart=true; if (c=='-' && wikiText.startsWith("----", p)) { // check for ----
    int pp = skipSpacesTabs(p+4, end); // skip spaces // yes, it's
    if (pp==end || wikiChars[pp]=='\n') throw new EndOfContextException(p); } if (LIST_CHARS.indexOf(c)>=0) { // start of list item? if (FORMAT_CHARS.indexOf(c)<0) throw new EndOfContextException(p); // here we have a list char, which also happen to be a format char if (p+1=0 && c==listLevels[0]) { // c matches current list's first level, so it must be new list item throw new EndOfContextException(p); } // otherwise it must be just formatting sequence => no break of context } else if (c=='=') { // header throw new EndOfContextException(p); } else if (c=='|') { // table or mediawiki-table throw new EndOfContextException(p); } else if (c=='{') { // mediawiki-table? if (p+1 flushToText(tb); // flush text buffer int startNowiki=p+3; int endNowiki=findEndOfNowiki(startNowiki); p=endNowiki+3; if (wikiText.lastIndexOf('\n', endNowiki)>=startNowiki) { // block
                                    // skip the very first '\n'
                                    if (wikiChars[startNowiki]=='\n') startNowiki++;
                                    // omit the very last '\n'
                                    if (wikiChars[endNowiki-1]=='\n') endNowiki--;
                                    // break the paragraph because XHTML does not allow 
     children
                                    // of 

    if (context==ContextType.PARAGRAPH) sb.append("

    "); sb.append("
    ");
                                    appendNowiki(wikiText.substring(startNowiki, endNowiki));
                                    sb.append("
    \n"); // continue the paragraph if (context==ContextType.PARAGRAPH) sb.append("

    "); // in this context return immediately after nowiki //if (context==ContextType.NOWIKI_BLOCK) return p; } else { // inline appendNowiki(wikiText.substring(startNowiki, endNowiki)); } continue; } else if (p+2=0 && endImg=0 && endLink=0 && endCode"); sb.append(escapeHTML(wikiText.substring(p+1, endCode))); sb.append(""); p=endCode+1; continue; } } else if (c=='\\') { if (p+1 flushToText(tb); // flush text buffer sb.append("
    "); p+=2; continue; } } // else if (c=='<') { // if (p+1>> // int endMacro=wikiText.indexOf(">>>", p+3); // if (endMacro>=0 && endMacro=0) { if (p+1 < end && wikiChars[p+1] == c && // make sure we see a matching close delimiter somewhere ahead wikiText.substring(p+2, end).indexOf(""+c+c) != -1) { // special case for "//" - check if it is part of URL (scheme://etc) int np = checkApplyURI(tb, p, start, end); if (np != p) { p = np; continue; } flushToText(tb); // flush text buffer sb.append(FORMAT_TAG_OPEN[formatType]); try { p=parseItemThrow(p+2, FORMAT_DELIM[formatType], context); } finally { sb.append(FORMAT_TAG_CLOSE[formatType]); } continue; } else if (c=='/') { // special case for "//" - check if it is part of URL (scheme://etc) int np = checkApplyURI(tb, p, start, end); if (np != p) { p = np; continue; } } else if (c=='-') { // ' -- ' => — if (p+2 < end && wikiChars[p+1] == '-' && wikiChars[p+2] == ' ' && p > start && wikiChars[p-1] == ' ') { tb.append("— "); p+=3; continue; } } } else if (c=='~') { // escape // most start line escapes are dealt with in parseBlock() if (atLineStart) { // same as block-level escaping: '*' '-' '#' '>' ':' '|' '=' if (p+1' || nc==':' || nc=='-' || nc=='|' || nc=='=' || nc=='!') { // can't be inline markup tb.append(nc); p+=2; // skip '~' and nc continue nextChar; } else if (nc=='*' || nc=='#') { // might be inline markup so need to double check char nnc=p+2 tocAnchorIds=new HashSet(); private String wikiText; private int pos=0; private int listLevel=-1; private static final int MAX_LIST_LEVELS=100; private char listLevels[]=new char[MAX_LIST_LEVELS+1]; // max number of levels allowed private boolean blockquoteBR=false; private boolean inTable=false; private int mediawikiTableLevel=0; private static enum ContextType {PARAGRAPH, LIST_ITEM, TABLE_CELL, HEADER, NOWIKI_BLOCK}; private static final String[] ESCAPED_INLINE_SEQUENCES= { "{{{", "{{", "}}}", "**", "//", "__", "##", "\\\\", "[[", "<<<", "~", "--", "|"}; private static final String LIST_CHARS="*-#>:!"; private static final String[] LIST_OPEN= { "

    • ", "
      • ", "
        1. ", "
          ", "
          ", "
          "}; private static final String[] LIST_CLOSE= { "
      \n", "
    \n", "
  • \n", "\n", "
    \n", "
    \n"}; private static final String FORMAT_CHARS="*/_#-"; private static final String[] FORMAT_DELIM= {"**", "//", "__", "##", "--"}; private static final String[] FORMAT_TAG_OPEN= { "", "", "", "", ""}; private static final String[] FORMAT_TAG_CLOSE= { "", "", "", "", ""}; }



    © 2015 - 2025 Weber Informatics LLC | Privacy Policy