All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.gwt.test.internal.utils.GwtHtmlParser Maven / Gradle / Ivy

There is a newer version: 0.63
Show newest version
package com.googlecode.gwt.test.internal.utils;

import com.google.gwt.dom.client.Node;
import com.google.gwt.dom.client.NodeList;
import com.googlecode.gwt.test.exceptions.GwtTestPatchException;
import com.googlecode.gwt.test.internal.AfterTestCallback;
import com.googlecode.gwt.test.internal.AfterTestCallbackManager;
import com.googlecode.html.filters.DefaultFilter;
import org.apache.xerces.xni.*;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.StringReader;
import java.util.Collections;

/**
 * HTML parser used by gwt-test-utils. It relies on htmlparser. For internal use
 * only.
 *
 * @author Gael Lazzari
 */
public class GwtHtmlParser implements AfterTestCallback {

    /**
     * Filter which keep " " and " " strings instead of converting them in a ' ' character.
     */
    private static class NbspRemover extends DefaultFilter {

        private static final String NBSP_ENTITY_NAME = "nbsp";

        boolean inNbspEntityRef;

        XMLString nbspXMLString;

        private NbspRemover() {
            nbspXMLString = new XMLString();
            char[] c = {'&', 'n', 'b', 's', 'p', ';'};
            nbspXMLString.setValues(c, 0, 6);
        }

        @Override
        public void characters(XMLString text, Augmentations augs) throws XNIException {

            if (!inNbspEntityRef) {
                super.characters(text, augs);
            }
        }

        @Override
        public void endGeneralEntity(String name, Augmentations augs) throws XNIException {

            inNbspEntityRef = false;
        }

        @Override
        public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
                throws XNIException {

            super.startDocument(locator, encoding, augs);
            inNbspEntityRef = false;
        }

        @Override
        public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding,
                                       Augmentations augs) throws XNIException {

            if (NBSP_ENTITY_NAME.equals(name)) {
                inNbspEntityRef = true;
                super.characters(nbspXMLString, augs);
            } else {
                super.startGeneralEntity(name, id, encoding, augs);
            }
        }
    }

    private static GwtHtmlParser INSTANCE = new GwtHtmlParser();

    public static NodeList parse(String html) {
        return INSTANCE.parseInternal(html);
    }

    private XMLReader reader;

    private GwtHtmlParser() {
        AfterTestCallbackManager.get().registerCallback(this);
    }

    public void afterTest() throws Throwable {
        reader = null;
    }

    private XMLReader getXMLReader() throws SAXException {
        if (reader == null) {

            reader = XMLReaderFactory.createXMLReader("com.googlecode.html.parsers.SAXParser");

            // FIXME : this feature does not work with the NekoHTML version included in gwt-dev.jar
            // (1.9.13) that's why we had to copy neko 1.9.15 sources in gwt-test-utils
            reader.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",
                    true);

            reader.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);

            reader.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");

            XMLDocumentFilter[] filters = {new NbspRemover()};

            reader.setProperty("http://cyberneko.org/html/properties/filters", filters);
        }
        return reader;

    }

    private NodeList parseInternal(String html) {
        if (html == null || html.trim().length() == 0) {
            return JsoUtils.newNodeList(Collections.emptyList());
        }

        try {
            XMLReader xmlReader = getXMLReader();
            GwtHtmlContentHandler contentHandler = new GwtHtmlContentHandler();
            xmlReader.setContentHandler(contentHandler);
            xmlReader.parse(new InputSource(new StringReader(html)));
            return contentHandler.getParsedNodes();
        } catch (Exception e) {
            throw new GwtTestPatchException("Error while parsing HTML '" + html + "'", e);
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy