All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickntap.tool.html.HTMLParser Maven / Gradle / Ivy

There is a newer version: 1.30
Show newest version
package com.clickntap.tool.html;

import javax.swing.text.html.parser.ParserDelegator;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;

public class HTMLParser {
    private static final String BODY_START_TAG = "";
    private static final String BODY_END_TAG = "";

    public static String parse(String html, HTMLFilter filter) throws Exception {
        html = parse(new StringReader(html), new StringWriter(), filter);
        html = html.replace("&", "&");
        return html;
    }

    public static String getText(String html) throws Exception {
        return parse(new StringReader(html), new StringWriter());
    }

    public static String parseBody(String html, HTMLFilter filter) throws Exception {
        if (html.indexOf(BODY_START_TAG) < 0)
            html = parse(new StringReader(BODY_START_TAG + html + BODY_END_TAG), new StringWriter(), filter);
        else
            html = parse(new StringReader(html), new StringWriter(), filter);
        int x1 = html.indexOf(BODY_START_TAG);
        int x2 = html.indexOf(BODY_END_TAG);
        return x1 > 0 && x2 > 0 ? html.substring(x1 + BODY_START_TAG.length(), x2).trim() : html.trim();
    }

    public static String parse(Reader r, Writer w, HTMLFilter filter) throws Exception {
        ParserDelegator parser = new HTMLParserDelegator();
        parser.parse(new EntityPreserveReader(r), new HTMLParserCallBack(w, filter), true);
        return w.toString().trim();
    }

    public static String parse(Reader r, Writer w) throws Exception {
        ParserDelegator parser = new HTMLParserDelegator();
        parser.parse(new EntityPreserveReader(r), new TextParserCallBack(w), true);
        return w.toString().trim();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy