All Downloads are FREE. Search and download functionalities are using the official Maven repository.

info.bliki.html.wikipedia.AbstractHTMLToWiki Maven / Gradle / Ivy

The newest version!
package info.bliki.html.wikipedia;

import info.bliki.htmlcleaner.BaseToken;
import info.bliki.htmlcleaner.ContentToken;
import info.bliki.htmlcleaner.EndTagToken;
import info.bliki.htmlcleaner.TagNode;

import java.util.List;
import java.util.Map;

/**
 * Base class for all HTML to wiki text converters.
 *
 * @see info.bliki.html.wikipedia.ToWikipedia
 * @see info.bliki.html.googlecode.ToGoogleCode
 */
public class AbstractHTMLToWiki {
    final Map fHashMap;

    final boolean fNoDiv;

    final boolean fNoFont;

    final boolean fNoMSWordTags;

    public AbstractHTMLToWiki(Map map, boolean noDiv, boolean noFont, boolean noMSWordTags) {
        super();
        fHashMap = map;
        fNoDiv = noDiv;
        fNoFont = noFont;
        fNoMSWordTags = noMSWordTags;
    }

    public AbstractHTMLToWiki(Map map, boolean noDiv, boolean noFont) {
        this(map, noDiv, noFont, false);
    }

    public void nodesToText(List nodes, StringBuilder resultBuffer) {
        if (nodes != null && !nodes.isEmpty()) {
            for (Object item : nodes) {
                if (item != null) {
                    if (item instanceof List) {
                        @SuppressWarnings("unchecked")
                        List list = (List) item;
                        nodesToText(list, resultBuffer);
                    } else if (item instanceof EndTagToken) {
                        EndTagToken node = (EndTagToken) item;
                        if (node.getName().equals("br")) {
                            resultBuffer.append("
"); } else if (node.getName().equals("hr")) { resultBuffer.append("\n----\n"); } } else if (item instanceof BaseToken) { nodeToWiki((BaseToken) item, resultBuffer); } } } } } public void nodeToWiki(BaseToken node, StringBuilder wikiText) { if (node instanceof ContentToken) { ContentToken contentToken = (ContentToken) node; String content = contentToken.getContent(); content = content.replaceAll(" ", " "); // content = StringUtils.replace(content, " ", " "); wikiText.append(content); } else if (node instanceof TagNode) { TagNode tagNode = (TagNode) node; String name = tagNode.getName(); HTMLTag tag = fHashMap.get(name); if (tag != null) { boolean showWithoutTag = false; if (fNoDiv && name.equals("div")) { showWithoutTag = true; } if (fNoFont && name.equals("font")) { showWithoutTag = true; } tag.content(this, tagNode, wikiText, showWithoutTag); } else { if (name.equals("br")) { wikiText.append("
"); } else if (name.equals("hr")) { wikiText.append("\n----\n"); } else { List children = tagNode.getChildren(); if (children.size() != 0) { nodesToText(children, wikiText); } } } } } protected void nodesToPlainText(List nodes, StringBuilder resultBuffer) { if (nodes != null && !nodes.isEmpty()) { for (Object item : nodes) { if (item != null) { if (item instanceof List) { @SuppressWarnings("unchecked") final List list = (List) item; nodesToPlainText(list, resultBuffer); } else if (item instanceof EndTagToken) { EndTagToken node = (EndTagToken) item; if (node.getName().equals("br")) { resultBuffer.append(" "); } else if (node.getName().equals("hr")) { resultBuffer.append(" "); } } else if (item instanceof BaseToken) { nodesToPlainText((BaseToken) item, resultBuffer); } } } } } public void nodesToPlainText(BaseToken node, StringBuilder plainText) { if (node instanceof ContentToken) { ContentToken contentToken = (ContentToken) node; // TODO refactor this: String content = contentToken.getContent(); content = content.replaceAll(" ", " "); content = content.replaceAll("<", "<"); content = content.replaceAll(">", ">"); content = content.replaceAll(""", "\""); content = content.replaceAll("&", "&"); content = content.replaceAll("'", "'"); plainText.append(content); } else if (node instanceof TagNode) { TagNode tagNode = (TagNode) node; List children = tagNode.getChildren(); if (children.size() != 0) { nodesToPlainText(children, plainText); } } } }