info.bliki.wiki.filter.WikipediaParser Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of bliki-core Show documentation
This is the core project.
The newest version!
package info.bliki.wiki.filter;

import info.bliki.commons.validator.routines.EmailValidator;
import info.bliki.htmlcleaner.ContentToken;
import info.bliki.htmlcleaner.EndTagToken;
import info.bliki.htmlcleaner.TagNode;
import info.bliki.htmlcleaner.TagToken;
import info.bliki.wiki.model.Configuration;
import info.bliki.wiki.model.DefaultEventListener;
import info.bliki.wiki.model.IEventListener;
import info.bliki.wiki.model.ITableOfContent;
import info.bliki.wiki.model.IWikiModel;
import info.bliki.wiki.tags.HTMLBlockTag;
import info.bliki.wiki.tags.HTMLTag;
import info.bliki.wiki.tags.HrTag;
import info.bliki.wiki.tags.PTag;
import info.bliki.wiki.tags.WPBoldItalicTag;
import info.bliki.wiki.tags.WPPreTag;
import info.bliki.wiki.tags.WPTag;
import info.bliki.wiki.tags.util.Attribute;
import info.bliki.wiki.tags.util.IBodyTag;
import info.bliki.wiki.tags.util.INoBodyParsingTag;
import info.bliki.wiki.tags.util.NodeAttribute;
import info.bliki.wiki.tags.util.TagStack;
import info.bliki.wiki.tags.util.WikiTagNode;

import java.util.List;

/**
 * A Wikipedia syntax parser for the second pass in the parsing of a Wikipedia
 * source text.
 *
 * @see TemplateParser for the first pass
 */
public class WikipediaParser extends AbstractWikipediaParser {
    public static final String[] TOC_IDENTIFIERS = { "TOC", "NOTOC", "FORCETOC" };

    private ITableOfContent fTableOfContentTag;
    private int fHeadCounter;

    private boolean fHtmlCodes = true;
    private boolean fNoToC;
    private boolean fRenderTemplate;
    private boolean fForceToC;

    private IEventListener fEventListener;

    public WikipediaParser(String stringSource, boolean renderTemplate) {
        this(stringSource, renderTemplate, null);
    }

    public WikipediaParser(String stringSource, boolean renderTemplate,
            IEventListener wikiListener) {
        super(stringSource);
        fRenderTemplate = renderTemplate;
        if (wikiListener == null) {
            fEventListener = DefaultEventListener.CONST;
        } else {
            fEventListener = wikiListener;
        }
    }

    /**
     * Check the text for a #REDIRECT [[...]] or
     * #redirect [[...]] link
     *
     * @param rawWikiText the wiki text
     * @param wikiModel the wikimodel to use
     * @return non-null if a redirect was found and further parsing
     *         should be cancelled according to the model.
     */
    public static String parseRedirect(String rawWikiText, IWikiModel wikiModel) {
        int redirectStart = -1;
        int redirectEnd = -1;
        for (int i = 0; i < rawWikiText.length(); i++) {
            if (rawWikiText.charAt(i) == '#') {
                if (startsWith(rawWikiText, i + 1, "redirect", true)) {
                    redirectStart = rawWikiText.indexOf("[[", i + 8);
                    if (redirectStart > i + 8) {
                        redirectStart += 2;
                        redirectEnd = rawWikiText.indexOf("]]", redirectStart);
                    }
                }
                break;
            }
            if (Character.isWhitespace(rawWikiText.charAt(i))) {
                continue;
            }
            break;
        }

        if (redirectEnd >= 0) {
            String redirectedLink = rawWikiText.substring(redirectStart, redirectEnd);
            if (wikiModel.appendRedirectLink(redirectedLink)) {
                return redirectedLink;
            }
        }
        return null;
    }

    /**
     * Copy the read ahead content in the resulting HTML text token.
     *
     * @param diff
     *            subtract diff form the current parser position to
     *            get the HTML text token end position.
     */
    private boolean createPreContentToken(final int diff) {
        if (fWhiteStart) {
            try {
                final int count = fCurrentPosition - diff - fWhiteStartPosition;
                if (count > 0) {
                    String rawWikiText = fStringSource.substring(
                            fWhiteStartPosition, fWhiteStartPosition + count);
                    WikipediaPreTagParser.parseRecursive(rawWikiText,
                            fWikiModel);
                    fWhiteStart = false;
                }
                return true;
            } catch (InvalidPreWikiTag ignored) {
            }
        }
        return false;
    }

    private int getNextToken() // throws InvalidInputException
    {
        fWhiteStart = true;
        fWhiteStartPosition = fCurrentPosition;
        try {
            while (true) {
                fCurrentCharacter = fSource[fCurrentPosition++];

                // ---------Identify the next token-------------
                switch (fCurrentCharacter) {
                case '\n':
                    // check at the end of line, if there is open wiki bold or
                    // italic
                    // markup
                    reduceTokenStackBoldItalic();
                    break;
                case '{':
                    // dummy parsing of wikipedia templates for event listeners
                    if (!parseTemplate()) {
                        // wikipedia table handling
                        if (parseTable()) {
                            continue;
                        }
                    }
                    break;
                case '_': // TOC identifiers __NOTOC__, __FORCETOC__ ...
                    if (parseSpecialIdentifiers()) {
                        continue;
                    }
                    break;
                case '=': // wikipedia header ?
                    if (parseSectionHeaders()) {
                        continue;
                    }
                    break;
                case WPList.DL_DD_CHAR: // start of  list
                case WPList.DL_DT_CHAR: // start of  list
                case WPList.OL_CHAR: // start of  list
                case WPList.UL_CHAR: // start of  list
                    if (parseLists()) {
                        continue;
                    }
                    break;
                // case ':':
                // if (parseSimpleDefinitionLists()) {
                // continue;
                // }
                // break;
                // case ';':
                // if (parseDefinitionLists()) {
                // continue;
                // }
                // break;
                case '-': // parse ---- as 
                    if (parseHorizontalRuler()) {
                        continue;
                    }
                    break;
                case ' ': // pre-formatted text?
                case '\t':
                    if (parsePreformattedWikiBlock()) {
                        continue;
                    }
                    break;
                }

                if (isStartOfLine() && fWikiModel.getRecursionLevel() == 1) {
                    if (isEmptyLine(1)) {
                        if (fWikiModel.stackSize() > 0
                                && (fWikiModel.peekNode() instanceof PTag)) {
                            // close  tag:
                            createContentToken(2);
                            fWikiModel
                                    .reduceTokenStack(Configuration.HTML_PARAGRAPH_OPEN);
                        }
                    } else {
                        if (fWikiModel.stackSize() == 0) {
                            addParagraph();
                            // if (fWikiModel.getRecursionLevel() == 1) {
                            // addParagraph();
                            // } else {
                            // if (fCurrentPosition > 1) {
                            // addParagraph();
                            // }
                            // }
                        } else {
                            TagToken tag = fWikiModel.peekNode();
                            if (tag instanceof WPPreTag) {
                                addPreformattedText();
                                // } else if (tag instanceof PTag) {
                                // createContentToken(fWhiteStart,
                                // fWhiteStartPosition, 2);
                                // reduceTokenStack(Configuration.HTML_PARAGRAPH_OPEN);
                            } else {
                                String allowedParents = Configuration.HTML_PARAGRAPH_OPEN
                                        .getParents();
                                if (allowedParents != null) {
                                    int index;
                                    index = allowedParents.indexOf("|"
                                            + tag.getName() + "|");
                                    if (index >= 0) {
                                        addParagraph();
                                    }
                                }
                            }
                        }
                    }
                }

                // ---------Identify the next token-------------
                switch (fCurrentCharacter) {
                case '[':
                    if (parseWikiLink()) {
                        continue;
                    }
                    break;
                case '\'':
                    if (getNextChar('\'')) {
                        if (getNextChar('\'')) {
                            if (getNextChar('\'')) {
                                if (getNextChar('\'')) {
                                    createContentToken(5);
                                    return TokenBOLDITALIC;
                                }
                                fCurrentPosition -= 1;
                                fWhiteStart = true;
                                createContentToken(3);
                                return TokenBOLD;
                            }
                            createContentToken(3);
                            return TokenBOLD;
                        }
                        createContentToken(2);
                        return TokenITALIC;
                    }
                    break;
                case '<':
                    if (fHtmlCodes) {
                        int htmlStartPosition = fCurrentPosition;
                        // HTML tags are allowed
                        try {
                            switch (fStringSource.charAt(fCurrentPosition)) {
                            case '!': // 
                                if (parseHTMLCommentTags()) {
                                    continue;
                                }
                                break;
                            default:

                                if (fSource[fCurrentPosition] != '/') {
                                    // opening HTML tag
                                    WikiTagNode tagNode = parseTag(fCurrentPosition);
                                    if (tagNode != null) {
                                        String tagName = tagNode.getTagName();
                                        TagToken tag = fWikiModel.getTokenMap()
                                                .get(tagName);
                                        if (tag != null) {
                                            tag = (TagToken) tag.clone();

                                            if (tag instanceof TagNode) {
                                                TagNode node = (TagNode) tag;
                                                List attributes = tagNode
                                                        .getAttributesEx();
                                                Attribute attr;
                                                String temp;
                                                for (int i = 1; i < attributes
                                                        .size(); i++) {
                                                    attr = attributes.get(i);
                                                    temp = attr.getValue();
                                                    if (temp != null) {
                                                        temp = parseNowiki(temp);
                                                    }
                                                    node.addAttribute(
                                                            attr.getName(),
                                                            temp, true);
                                                }
                                            }
                                            if (tag instanceof HTMLTag) {
                                                ((HTMLTag) tag)
                                                        .setTemplate(isTemplate());
                                            }

                                            createContentToken(1);

                                            fCurrentPosition = fScannerPosition;

                                            String allowedParents = tag
                                                    .getParents();
                                            if (allowedParents != null) {
                                                fWikiModel
                                                        .reduceTokenStack(tag);
                                            }
                                            createTag(tag, tagNode,
                                                    tagNode.getEndPosition());
                                            return TokenIgnore;
                                        } else {
                                            fCurrentPosition = tagNode.getEndPosition();
                                            // fWhiteStart = true;
                                            // skipUntilEndOfTag(tagNode,
                                            // tagNode.getEndPosition());
                                            // createContentToken(0);
                                            // return TokenIgnore;
                                        }
                                        // break;
                                    }
                                } else {
                                    // closing HTML tag
                                    WikiTagNode tagNode = parseTag(++fCurrentPosition);
                                    if (tagNode != null) {
                                        String tagName = tagNode.getTagName();
                                        TagToken tag = fWikiModel.getTokenMap()
                                                .get(tagName);
                                        if (tag != null) {
                                            createContentToken(2);
                                            fCurrentPosition = fScannerPosition;

                                            if (fWikiModel.stackSize() > 0) {
                                                TagToken topToken = fWikiModel
                                                        .peekNode();
                                                if (topToken.getName().equals(
                                                        tag.getName())) {
                                                    fWikiModel.popNode();
                                                    return TokenIgnore;
                                                } else {
                                                    if (tag.isReduceTokenStack()) {
                                                        reduceStackUntilToken(tag);
                                                    }
                                                }
                                            }
                                            return TokenIgnore;
                                        }
                                        break;
                                    }
                                }
                            }
                        } catch (IndexOutOfBoundsException e) {
                            // do nothing
                        }
                        fCurrentPosition = htmlStartPosition;
                    }
                    break;
                default:
                    if (Character.isLetter(fCurrentCharacter)) {
                        if (fCurrentPosition < 2
                                || !Character
                                        .isLetterOrDigit(fSource[fCurrentPosition - 2])) {
                            if (fCurrentCharacter == 'i'
                                    || fCurrentCharacter == 'I') {
                                // ISBN ?
                                if (parseISBNLinks()) {
                                    fWhiteStart = true;
                                    fWhiteStartPosition = fCurrentPosition;
                                    continue;
                                }
                            }

                            if (parseURIScheme()) {
                                // a URI scheme registered in the wiki model
                                // (ftp, http,
                                // https,...)
                                fWhiteStart = true;
                                fWhiteStartPosition = fCurrentPosition;
                                continue;
                            }

                            if (fWikiModel.isCamelCaseEnabled()
                                    && Character.isUpperCase(fCurrentCharacter)
                                    && fWikiModel.getRecursionLevel() <= 1) {
                                if (parseCamelCaseLink()) {
                                    fWhiteStart = true;
                                    fWhiteStartPosition = fCurrentPosition;
                                    continue;
                                }
                            }
                        }
                    }
                }

                if (!fWhiteStart) {
                    fWhiteStart = true;
                    fWhiteStartPosition = fCurrentPosition - 1;
                }

            }
            // -----------------end switch while try--------------------
        } catch (IndexOutOfBoundsException e) {
            // end of scanner text
        }
        try {
            createContentToken(1);
        } catch (IndexOutOfBoundsException e) {
            // end of scanner text
        }
        return TokenEOF;
    }

    private String parseNowiki(String input) {
        int indx = input.indexOf("");
        if (indx >= 0) {
            int indx2;
            int lastIndx = 0;
            StringBuilder buf = new StringBuilder(input.length());
            while (indx >= 0) {
                buf.append(input.substring(lastIndx, indx));
                lastIndx = indx + 8; //  length
                indx2 = input.indexOf("", indx + 1);
                if (indx2 >= 0) {
                    buf.append(input.substring(lastIndx, indx2));
                    lastIndx = indx2 + 9;//  length
                } else {
                    break;
                }
                indx = input.indexOf("", indx2 + 1);
            }
            buf.append(input.substring(lastIndx, input.length()));
            return buf.toString();
        }
        return input;
    }

    private void addParagraph() {
        createContentToken(2);
        fWikiModel.reduceTokenStack(Configuration.HTML_PARAGRAPH_OPEN);
        fWikiModel.pushNode(new PTag());
    }

    /**
     * Add the content of the wiki <pre> block. Trim the content at the
     * right side.
     */
    private void addPreformattedText() {
        if (fWhiteStart) {
            int currentPos = fCurrentPosition;
            int whiteEndPosition = fCurrentPosition - 2;
            while (whiteEndPosition > fWhiteStartPosition) {
                if (!Character.isWhitespace(fSource[whiteEndPosition])) {
                    whiteEndPosition++;
                    break;
                }
                whiteEndPosition--;
            }
            try {
                fCurrentPosition = whiteEndPosition;
                createContentToken(0);
            } finally {
                fCurrentPosition = currentPos;
            }
        }
        fWikiModel.reduceTokenStack(Configuration.HTML_PARAGRAPH_OPEN);
        fWikiModel.pushNode(new PTag());
    }

    private boolean parseISBNLinks() {
        final int urlStartPosition = fCurrentPosition;
        boolean foundISBN = false;
        try {
            if ((fCurrentCharacter == 'i' || fCurrentCharacter == 'I')
                    && (fSource[fCurrentPosition] == 's' || fSource[fCurrentPosition] == 'S')
                    && (fSource[++fCurrentPosition] == 'b' || fSource[fCurrentPosition] == 'B')
                    && (fSource[++fCurrentPosition] == 'n' || fSource[fCurrentPosition] == 'N')
                    && fSource[++fCurrentPosition] == ' ') {
                fCurrentPosition++;
                createContentToken(5);
                foundISBN = true;
                char ch;
                ch = fSource[fCurrentPosition++];
                while ((ch >= '0' && ch <= '9') || ch == '-') {
                    ch = fSource[fCurrentPosition++];
                }
            }
        } catch (IndexOutOfBoundsException ignored) {
        }
        if (foundISBN) {
            String urlString = fStringSource.substring(urlStartPosition - 1,
                    fCurrentPosition - 1);
            fCurrentPosition--;
            fWikiModel.appendISBNLink(urlString);
            return true;
        }
        // rollback work :-)
        fCurrentPosition = urlStartPosition;
        return false;
    }

    private boolean parseMailtoLinks() {
        final int urlStartPosition = fCurrentPosition;
        int tempPosition = fCurrentPosition;
        boolean foundUrl = false;
        try {
            if ((fCurrentCharacter == 'm' || fCurrentCharacter == 'M')
                    && (fSource[fCurrentPosition] == 'a' || fSource[fCurrentPosition] == 'A')
                    && (fSource[++fCurrentPosition] == 'i' || fSource[fCurrentPosition] == 'I')
                    && (fSource[++fCurrentPosition] == 'l' || fSource[fCurrentPosition] == 'L')
                    && (fSource[++fCurrentPosition] == 't' || fSource[fCurrentPosition] == 'T')
                    && (fSource[++fCurrentPosition] == 'o' || fSource[fCurrentPosition] == 'O')
                    && fSource[fCurrentPosition + 1] == ':') {
                tempPosition += 6;
                fCurrentCharacter = fSource[tempPosition++];

                foundUrl = true;
                while (!Character.isWhitespace(fSource[tempPosition++])) {
                }
            }
        } catch (IndexOutOfBoundsException ignored) {
        }
        if (foundUrl) {
            String urlString = fStringSource.substring(urlStartPosition - 1,
                    tempPosition - 1);
            String email = urlString.substring(7);
            if (EmailValidator.getInstance().isValid(email)) {
                createContentToken(5);
                fWhiteStart = false;
                fCurrentPosition = tempPosition;
                fCurrentPosition--;
                fWikiModel.appendMailtoLink(urlString, urlString, true);
                return true;
            }

        }
        // rollback work :-)
        fCurrentPosition = urlStartPosition;
        return false;
    }

    /**
     * See URI scheme
     *
     * @return true if a registered URI scheme was found in the
     *         wiki models configuration..
     */
    private boolean parseURIScheme() {
        if (fCurrentCharacter == 'm' || fCurrentCharacter == 'M') {
            // mailto ?
            if (parseMailtoLinks()) {
                return true;
            }
        }
        int urlStartPosition = fCurrentPosition;
        int tempPosition = fCurrentPosition;
        String uriSchemeName = "";
        int index = -1;
        boolean foundUrl = false;
        try {
            index = indexOfUntilNoLetter(':', fCurrentPosition);
            if (index > 0) {
                uriSchemeName = fStringSource.substring(fCurrentPosition - 1,
                        index);
                if (fWikiModel.isValidUriScheme(uriSchemeName)) {
                    // found something like "ftp", "http", "https"
                    tempPosition += uriSchemeName.length() + 1;
                    fCurrentCharacter = fSource[tempPosition++];

                    createContentToken(1);
                    fWhiteStart = false;
                    foundUrl = true;
                    while (Encoder.isUrlIdentifierPart(fSource[tempPosition++])) {
                    }
                }
            }
        } catch (IndexOutOfBoundsException ignored) {
        }

        if (foundUrl) {
            // separators at the end must be removed - maybe more chars?
            final String separators = ".!;?:,";
            while (tempPosition > 1 && tempPosition > urlStartPosition
                    && (separators.indexOf(fSource[tempPosition - 2]) != (-1))) {
                --tempPosition;
            }
            String restString = fStringSource.substring(urlStartPosition - 1,
                    tempPosition - 1);
            String uriSchemeSpecificPart = fStringSource.substring(index + 1,
                    tempPosition - 1);
            if (fWikiModel.isValidUriSchemeSpecificPart(uriSchemeName,
                    uriSchemeSpecificPart)) {
                fWhiteStart = false;
                fCurrentPosition = tempPosition;
                fCurrentPosition--;
                fWikiModel.appendExternalLink(uriSchemeName, restString,
                        restString, true);
                return true;
            }

        }
        // rollback work :-)
        fCurrentPosition = urlStartPosition;
        return false;
    }

    private boolean parseCamelCaseLink() {
        int startLinkPosition = fCurrentPosition - 1;
        int temp = fCurrentPosition;
        boolean isCamelCase = false;
        try {
            char ch = fSource[temp++];
            while (Character.isLetterOrDigit(ch)) {
                if (Character.isUpperCase(ch)) {
                    // at least 2 upper case characters appear in the word
                    isCamelCase = true;
                }
                ch = fSource[temp++];
            }
        } catch (IndexOutOfBoundsException ignored) {
        }

        if (isCamelCase) {
            createContentToken(1);
            fWhiteStart = false;
            fCurrentPosition = temp - 1;

            String name = fStringSource.substring(startLinkPosition,
                    fCurrentPosition);
            fWikiModel.appendInternalLink(name, null, name, null, false);
            return true;
        }

        return false;
    }

    /**
     * Parse a wiki section starting with a '[' character
     *
     * @return true if a correct link was found
     */
    private boolean parseWikiLink() {
        int startLinkPosition = fCurrentPosition;
        if (getNextChar('[')) {
            return parseWikiTag();
        } else if (getNextCharAsWhitespace()) {
            fCurrentPosition--;
            return false;
        } else {
            createContentToken(1);
            fWhiteStart = false;

            if (readUntilCharOrStopAtEOL(']')) {
                String name = fStringSource.substring(startLinkPosition,
                        fCurrentPosition - 1);

                if (handleHTTPLink(name)) {
                    return true;
                }
            }
            fCurrentPosition = startLinkPosition;
        }
        return false;
    }

    /**
     * Parse a wiki section starting with a '[[' sequence
     *
     * @return true if a correct link was found
     */
    private boolean parseWikiTag() {
        int startLinkPosition = fCurrentPosition;
        int endLinkPosition;
        // wikipedia link style
        createContentToken(2);

        int temp = fCurrentPosition;
        if (findWikiLinkEnd()) {
            endLinkPosition = fCurrentPosition - 2;
            final String name = fStringSource.substring(startLinkPosition, endLinkPosition);
            // test for a suffix string behind the Wiki link. Useful for plurals.
            // Example: Dolphins are [[aquatic mammal]]s that are closely related to [[whale]]s and [[porpoise]]s.
            temp = fCurrentPosition;
            String suffix = "";
            try {
                fCurrentCharacter = fSource[fCurrentPosition];
                if (Character.isLowerCase(fCurrentCharacter)) {
                    fCurrentPosition++;
                    StringBuilder suffixBuffer = new StringBuilder(16);
                    suffixBuffer.append(fCurrentCharacter);
                    while (true) {
                        fCurrentCharacter = fSource[fCurrentPosition++];
                        if (!Character.isLowerCase(fCurrentCharacter)) {
                            fCurrentPosition--;
                            break;
                        }
                        suffixBuffer.append(fCurrentCharacter);
                    }
                    suffix = suffixBuffer.toString();
                }
            } catch (IndexOutOfBoundsException e) {
                fCurrentPosition = temp;
            }
            fEventListener.onWikiLink(fSource, startLinkPosition,
                    endLinkPosition, suffix);

            if (!fWikiModel.appendRawWikipediaLink(name, suffix)) {
                fCurrentPosition = temp;
            }
            return true;
        } else {
            fWhiteStart = true;
            fWhiteStartPosition = startLinkPosition - 2;
            fCurrentPosition = temp + 1;
        }
        return false;
    }

    private boolean parsePreformattedWikiBlock() {
        if (isStartOfLine() && !isEmptyLine(1)) {
            if (fWikiModel.stackSize() == 0
                    || !(fWikiModel.peekNode() instanceof HTMLBlockTag)
                    || (fWikiModel.peekNode() instanceof PTag)) {
                createContentToken(2);
                fWikiModel.reduceTokenStack(Configuration.HTML_PRE_OPEN);

                // don't use Configuration.HTML_PRE_OPEN here
                // rendering differs between these tags!
                fWikiModel.pushNode(new WPPreTag());

                char ch = ' ';
                try {
                    while (ch == ' ' || ch == '\t') {
                        // SPACE or TAB => check if it's a pre-formatted text
                        fWhiteStart = true;
                        fWhiteStartPosition = fCurrentPosition;
                        ch = fSource[fCurrentPosition++];
                        while (ch != '\n' && fCurrentPosition < fSource.length) {
                            ch = fSource[fCurrentPosition++];
                        }
                        if (fCurrentPosition == fSource.length) {
                            // scanner reached end of text
                            if (!createPreContentToken(0)) {
                                fCurrentPosition = fWhiteStartPosition;
                                fSource[fWhiteStartPosition - 1] = '\n';
                                return false;
                            }
                        } else {
                            ch = fSource[fCurrentPosition++];
                            if (ch == ' ' || ch == '\t') {
                                if (!createPreContentToken(1)) {
                                    fCurrentPosition = fWhiteStartPosition;
                                    fSource[fWhiteStartPosition - 1] = '\n';
                                    return false;
                                }
                            } else {
                                // skip the newline character at the end of the
                                // pre-formatted
                                // block
                                if (!createPreContentToken(2)) {
                                    fCurrentPosition = fWhiteStartPosition;
                                    fSource[fWhiteStartPosition - 1] = '\n';
                                    return false;
                                } else {
                                    fCurrentPosition--;
                                    return true;
                                }
                            }
                        }

                    }
                } catch (IndexOutOfBoundsException e) {
                    fCurrentPosition--;
                } finally {
                    fWikiModel.popNode();
                }

            }
            return true;
        }
        return false;
    }

    /**
     * Parse ---- as <hr> tag
     */
    private boolean parseHorizontalRuler() {
        if (isStartOfLine()) {
            int tempCurrPosition = fCurrentPosition;
            try {
                if (fSource[tempCurrPosition++] == '-'
                        && fSource[tempCurrPosition++] == '-'
                        && fSource[tempCurrPosition++] == '-') {
                    int pos = isEndOfLine('-', tempCurrPosition);
                    if (pos > 0) {
                        HrTag hr = new HrTag();
                        createContentToken(2);
                        fWikiModel.reduceTokenStack(hr);
                        fCurrentPosition = pos;
                        fWikiModel.append(hr);
                        fWhiteStart = false;
                        return true;
                    }
                }
            } catch (IndexOutOfBoundsException ignored) {
            }
        }
        return false;
    }

    /**
     * Parse a wiki list 

     * 

     * Example:

     *
     * 
     * * first line
     * * second line
     * * third line
     * 
     */
    private boolean parseLists() {
        // set scanner pointer to '\n' character:
        if (isStartOfLine()) {
            setPosition(fCurrentPosition - 2);
            WPList list = wpList();
            if (list != null && !list.isEmpty()) {
                createContentToken(1);
                fWikiModel.reduceTokenStack(list);
                fCurrentPosition = getPosition() - 1;
                fWikiModel.append(list);
                return true;
            }
        }
        return false;
    }

    /**
     * Parses a wiki header line into "h1, h2, h3, h4, h5, h6" HTML
     * tags. 

     * 

     * Example wiki syntax header line: 

     * == Test header 2 ==
     *
     * @return true if a header line could be parsed correctly,
     *         false otherwise.
     */
    private boolean parseSectionHeaders() {
        if (isStartOfLine()) {
            int headerStartPosition = fCurrentPosition - 1;
            int endIndex = fStringSource.indexOf("\n", fCurrentPosition);
            if (endIndex < 0) {
                endIndex = fStringSource.length();
            }
            int headerEndPosition = endIndex;
            char ch;
            while (headerEndPosition > 0) {
                ch = fSource[--headerEndPosition];
                if (!Character.isWhitespace(ch)) {
                    break;
                }
            }
            if (headerEndPosition < 0
                    || headerEndPosition <= headerStartPosition) {
                return false;
            }
            int level = 0;
            int startPosition = headerStartPosition;
            int endPosition = headerEndPosition + 1;
            while (headerStartPosition < headerEndPosition) {
                if (fSource[headerStartPosition] == '='
                        && fSource[headerEndPosition] == '=') {
                    level++;
                    headerStartPosition++;
                    headerEndPosition--;
                } else {
                    headerEndPosition++;
                    break;
                }
            }
            if (level == 0) {
                return false;
            }
            if (level > 6) {
                level = 6;
            }
            createContentToken(1);
            reduceTokenStack();
            String head = "";
            if (headerEndPosition >= headerStartPosition) {
                if (headerEndPosition > headerStartPosition) {
                    head = fStringSource.substring(headerStartPosition,
                            headerEndPosition);
                } else {
                    head = String.valueOf(fStringSource
                            .charAt(headerStartPosition));
                }
            }
            fEventListener.onHeader(fSource, startPosition, endPosition,
                    headerStartPosition, headerEndPosition, level);
            fCurrentPosition = endIndex;

            fTableOfContentTag = fWikiModel.appendHead(head, level, fNoToC,
                    ++fHeadCounter, startPosition, endPosition);
            return true;
        }
        return false;
    }

    private boolean parseTable() {
        if (isStartOfLine()) {
            // wiki table ?
            setPosition(fCurrentPosition - 1);
            WPTable table = wpTable(fTableOfContentTag);
            if (table != null) {
                createContentToken(1);
                fWikiModel.reduceTokenStack(table);
                // set pointer behind: "\n|}"
                fCurrentPosition = getPosition();
                fWikiModel.append(table);
                // table.filter(fSource, fWikiModel);
                return true;
            }
        }
        return false;
    }

    private boolean parseTemplate() {
        try {
            // dummy parsing of Wikipedia templates for event listeners
            // doesn't change fCurrentPosition
            if (fSource[fCurrentPosition] == '{') {
                int templateStartPosition = fCurrentPosition + 1;
                if (fSource[templateStartPosition] != '{') {
                    int templateEndPosition = findNestedTemplateEnd(fSource,
                            templateStartPosition);
                    if (templateEndPosition > 0) {
                        fEventListener.onTemplate(fSource,
                                templateStartPosition, templateEndPosition - 2);
                        return true;
                    }
                }
            }
        } catch (Exception ignored) {
        }
        return false;
    }

    /**
     * Parse special identifiers like __TOC__, __NOTOC__, __FORCETOC__
     */
    private boolean parseSpecialIdentifiers() {
        if (fSource.length > fCurrentPosition
                && fSource[fCurrentPosition] == '_') {
            int oldPosition = fCurrentPosition;
            try {
                fCurrentPosition++;
                int tocEndPosition = fCurrentPosition;
                char ch;
                while (true) {
                    ch = fSource[tocEndPosition++];
                    if (ch >= 'A' && ch <= 'Z') {
                        continue;
                    }
                    break;
                }
                if (ch == '_' && fSource[tocEndPosition] == '_') {
                    String tocIdent = fStringSource.substring(fCurrentPosition,
                            tocEndPosition - 1);
                    if (fWikiModel.parseBehaviorSwitch(tocIdent)) {
                        createContentToken(2);
                        fCurrentPosition = tocEndPosition + 1;
                        return true;
                    }
                    boolean tocRecognized = false;
                    for (int i = 0; i < TOC_IDENTIFIERS.length; i++) {
                        if (TOC_IDENTIFIERS[i].equals(tocIdent)) {
                            createContentToken(2);
                            tocRecognized = true;
                            fCurrentPosition = tocEndPosition + 1;
                            switch (i) {
                            case 0: // TOC
                                fTableOfContentTag = fWikiModel
                                        .createTableOfContent(true);
                                fForceToC = true;
                                break;
                            case 1: // NOTOC
                                setNoToC(true);
                                break;
                            case 2: // FORCETOC
                                fForceToC = true;
                                break;
                            }
                            break;
                        }
                    }
                    if (tocRecognized) {
                        return true;
                    }
                }
            } catch (IndexOutOfBoundsException e) {
                // end of scanner text
            }
            fCurrentPosition = oldPosition;
        }

        return false;
    }

    /**
     * Check if the scanners cursor position is at the beginning of a line.
     *
     * @return true if the scanners cursor points to the beginning
     *         of a line, false otherwise.
     */
    private boolean isStartOfLine() {
        if (fCurrentPosition >= 2) {
            if (fSource[fCurrentPosition - 2] == '\n') {
                return true;
            }
        } else if (fCurrentPosition == 1) {
            return true;
        }
        return false;
    }

    private int isEndOfLine(char testChar, int currentPosition) {
        int tempPosition = currentPosition;
        try {
            char ch;
            while (true) {
                ch = fSource[tempPosition];
                if (ch != testChar) {
                    break;
                }
                tempPosition++;
            }
            while (true) {
                ch = fSource[tempPosition++];
                if (ch == '\n') {
                    return tempPosition;
                } else if (!Character.isWhitespace(ch)) {
                    return -1;
                }
            }
        } catch (IndexOutOfBoundsException ignored) {
        }
        return -1;
    }

    private void createTag(TagToken tag, WikiTagNode tagNode,
            int startMacroPosition) {
        String endTag;
        String macroBodyString;
        int index0;
        String command = tagNode.getTagName();
        if ((tag != null) && (tag instanceof IBodyTag)
                && (!tagNode.isEmptyXmlTag())) {
            endTag = command + '>';
            index0 = Util.indexOfIgnoreCase(fStringSource, "= 0) {
                macroBodyString = fStringSource.substring(startMacroPosition,
                        index0);
                fCurrentPosition = index0 + endTag.length() + 2;
            } else {
                macroBodyString = fStringSource.substring(startMacroPosition,
                        fSource.length);
                fCurrentPosition = fSource.length;
            }
        } else {
            macroBodyString = null;
            fCurrentPosition = startMacroPosition;
        }

        handleTag(tag, tagNode, macroBodyString);
    }

    private boolean handleHTTPLink(String name) {
        String urlString;
        String uriSchemeName = "";
        if (name != null) {
            boolean isEmail = false;

            int index = -1;
            boolean foundUrl = false;
            boolean protocolRelativeURL = false;

            urlString = name.trim();
            if (urlString.length() >= 2 && urlString.charAt(0) == '/'
                    && urlString.charAt(1) == '/') {
                // issue 89
                foundUrl = true;
                protocolRelativeURL = true;
            } else {

                try {
                    index = urlString.indexOf(':', 1);
                    if (index > 0) {
                        uriSchemeName = urlString.substring(0, index);
                        if (uriSchemeName.equalsIgnoreCase("mailto")) {
                            isEmail = true;
                            foundUrl = true;
                        } else {
                            if (fWikiModel.isValidUriScheme(uriSchemeName)) {
                                foundUrl = true;
                            }
                        }
                    }
                } catch (IndexOutOfBoundsException ignored) {
                }
            }

            if (foundUrl) {
                // Wikipedia link style: name separated by invalid URL character?
                // see test: "open square bracket forbidden in URL (named) (bug 4377)"
                int pipeIndex = 0;
                while (pipeIndex < urlString.length() &&
                        (Encoder.isUrlIdentifierPart(urlString.charAt(pipeIndex)) ||
                        urlString.charAt(pipeIndex) == '\'')) {
                    ++pipeIndex;
                }
                String alias;
                if (pipeIndex < urlString.length()) {
                    if (urlString.charAt(pipeIndex) == ' ') {
                        alias = urlString.substring(pipeIndex + 1);
                    } else {
                        alias = urlString.substring(pipeIndex);
                    }
                    urlString = urlString.substring(0, pipeIndex);
                } else {
                    if (protocolRelativeURL) {
                        alias = urlString.substring(2);
                    } else {
                        alias = urlString;
                    }
                }

                if (isEmail) {
                    String email;
                    if (pipeIndex > 7) {
                        email = urlString.substring(7, pipeIndex);
                    } else {
                        email = urlString.substring(7);
                    }
                    if (EmailValidator.getInstance().isValid(email)) {
                        fWikiModel.appendMailtoLink(urlString, alias, false);
                        return true;
                    }
                } else {
                    if (protocolRelativeURL) {
                        fWikiModel.appendExternalLink(uriSchemeName, urlString,
                                alias, false);
                        return true;
                    }
                    parseURIScheme();
                    String uriSchemeSpecificPart = urlString
                            .substring(index + 1);
                    if (fWikiModel.isValidUriSchemeSpecificPart(uriSchemeName,
                            uriSchemeSpecificPart)) {
                        fWikiModel.appendExternalLink(uriSchemeName, urlString,
                                alias, false);
                        return true;
                    }
                }

            }
        }
        return false;
    }

    private void handleTag(TagToken tag, WikiTagNode tagNode, String bodyString) {
        String command = tagNode.getTagName();
        try {
            if (tag instanceof EndTagToken) {
                fWikiModel.append(tag);
            } else {
                fWikiModel.pushNode(tag);
                if (null != bodyString) {
                    if (tag instanceof INoBodyParsingTag) {
                        ((TagNode) tag).addChild(new ContentToken(bodyString));
                    } else {
                        // recursively filter tags within the tags body string
                        WikipediaParser.parseRecursive(bodyString.trim(),
                                fWikiModel, false, true);
                    }
                }
                if (tag instanceof IBodyTag) {
                    fWikiModel.popNode();
                }
            }
        } catch (IllegalArgumentException e) {
            TagNode divTagNode = new TagNode("div");
            divTagNode.addAttribute("class", "error", true);
            divTagNode.addChild(new ContentToken("IllegalArgumentException: "
                    + command + " - " + e.getMessage()));
            fWikiModel.append(divTagNode);
            e.printStackTrace();
        } catch (Throwable e) {
            e.printStackTrace();
            TagNode divTagNode = new TagNode("div");
            divTagNode.addAttribute("class", "error", true);
            divTagNode.addChild(new ContentToken(command + ": "
                    + e.getMessage()));
            fWikiModel.append(divTagNode);
            e.printStackTrace();
        }
    }

    public void runParser() {
        int token;
        while ((token = getNextToken()) != TokenEOF) {
            switch (token) {
            case TokenBOLDITALIC:
                if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(BOLDITALIC)) {
                    fWikiModel.popNode();
                } else if (fWikiModel.stackSize() > 1
                        && fWikiModel.peekNode().equals(BOLD)
                        && fWikiModel.getNode(fWikiModel.stackSize() - 2)
                                .equals(ITALIC)) {
                    fWikiModel.popNode();
                    fWikiModel.popNode();
                } else if (fWikiModel.stackSize() > 1
                        && fWikiModel.peekNode().equals(ITALIC)
                        && fWikiModel.getNode(fWikiModel.stackSize() - 2)
                                .equals(BOLD)) {
                    fWikiModel.popNode();
                    fWikiModel.popNode();
                } else if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(BOLD)) {
                    fWikiModel.popNode();
                    fWikiModel.pushNode(new WPTag("i"));
                } else if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(ITALIC)) {
                    fWikiModel.popNode();
                    fWikiModel.pushNode(new WPTag("b"));
                } else {
                    fWikiModel.pushNode(new WPBoldItalicTag());
                }
                break;
            case TokenBOLD:
                if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(BOLDITALIC)) {
                    fWikiModel.popNode();
                    fWikiModel.pushNode(new WPTag("i"));
                    // fResultBuffer.append("");
                } else if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(BOLD)) {
                    fWikiModel.popNode();
                } else {
                    fWikiModel.pushNode(new WPTag("b"));
                }
                break;
            case TokenITALIC:
                if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(BOLDITALIC)) {
                    fWikiModel.popNode();
                    fWikiModel.pushNode(new WPTag("b"));
                } else if (fWikiModel.stackSize() > 0
                        && fWikiModel.peekNode().equals(ITALIC)) {
                    fWikiModel.popNode();
                } else {
                    fWikiModel.pushNode(new WPTag("i"));
                }
                break;
            }
        }
        reduceTokenStack();

        if (!fNoToC && fTableOfContentTag != null) {
            if (fHeadCounter > 3 || fForceToC) {
                fTableOfContentTag.setShowToC(true);
            }
        }

    }

    @Override
    public void setNoToC(boolean noToC) {
        fNoToC = noToC;
    }

    /**
     * Call the parser on the first recursion level, where the text can contain
     * a table of contents (TOC).
     *
     * 

     * 

     * Note: in this level the wiki model will call the
     * setUp() method before parsing and the
     * tearDown() method after the parser has finished.
     *
     * @param rawWikiText
     *            the raw text of the article
     * @param wikiModel
     *            a suitable wiki model for the given wiki article text
     * @param parseTemplates
     *            parse the template expansion step
     * @param templateParserBuffer
     *            if the templateParserBuffer != null the
     *            templateParserBuffer will be used to append the
     *            result of the template expansion step
     *
     */
    public static void parse(String rawWikiText, IWikiModel wikiModel,
            boolean parseTemplates, Appendable templateParserBuffer) {
        try {
            // initialize the wiki model
            wikiModel.setUp();

            if (parseTemplates) {
                Appendable buf;
                if (templateParserBuffer != null) {
                    buf = templateParserBuffer;
                } else {
                    buf = new StringBuilder(rawWikiText.length()
                            + rawWikiText.length() / 10);
                }
                String templatesParsedText = rawWikiText;
                try {
                    // TemplateParser.parse(templatesParsedText, wikiModel, buf,
                    // wikiModel.isTemplateTopic());
                    TemplateParser.parseRecursive(templatesParsedText,
                            wikiModel, buf, false, wikiModel.isTemplateTopic(),
                            null);
                    templatesParsedText = buf.toString();
                } catch (Exception ioe) {
                    ioe.printStackTrace();
                    templatesParsedText = "TemplateParser exception: "
                            + ioe.getClass().getSimpleName() + "";
                }
                String redirectedLink = parseRedirect(
                        templatesParsedText, wikiModel);
                if (redirectedLink == null) {
                    parseRecursive(templatesParsedText, wikiModel, false, false);
                }
            } else {
                if (parseRedirect(rawWikiText, wikiModel) == null) {
                    parseRecursive(rawWikiText, wikiModel, false, false);
                }
            }
        } finally {
            // clean up wiki model if necessary
            wikiModel.tearDown();
        }
    }

    /**
     * Call the parser on the subsequent recursion levels, where the subtexts
     * (of templates, table cells, list items or image captions) don't contain a
     * table of contents (TOC)
     *
     * Note: the wiki model doesn't call the setUp() or
     * tearDown() methods for the subsequent recursive parser
     * steps.
     *
     * @return HTML tags from the parsing process
     */
    public static TagStack parseRecursive(String rawWikitext,
            IWikiModel wikiModel, boolean createOnlyLocalStack, boolean noTOC) {
        AbstractWikipediaParser parser =  wikiModel.createNewInstance(rawWikitext);
        return parser.parseRecursiveInternal(wikiModel, createOnlyLocalStack,
                noTOC);
    }

    /**
     * Determine if the currently parsed wiki text is a template text.
     *
     * @return true if the currently parsed wiki text is a template
     */
    private boolean isTemplate() {
        return fRenderTemplate;
    }
}