All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.html.parsers.DOMParser Maven / Gradle / Ivy

There is a newer version: 0.63
Show newest version
/*
 * Copyright 2002-2009 Andy Clark, Marc Guillemot
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.googlecode.html.parsers;

import com.googlecode.html.HTMLConfiguration;
import com.googlecode.html.xercesbridge.XercesBridge;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.XNIException;

/**
 * A DOM parser for HTML documents.
 *
 * @author Andy Clark
 * @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
 */
public class DOMParser
/***/
        extends org.apache.xerces.parsers.DOMParser {
    /***
     * // NOTE: It would be better to extend from AbstractDOMParser but // most users will find it
     * easier if the API is just like the // Xerces DOM parser. By extending directly from DOMParser,
     * // users can register SAX error handlers, entity resolvers, // and the like. -Ac extends
     * org.apache.xerces.parsers.AbstractDOMParser { /
     ***/

    //
    // Constructors
    //

    /**
     * Returns the parser's sub-version number.
     */
    private static int getParserSubVersion() {
        try {
            String VERSION = XercesBridge.getInstance().getVersion();
            int index1 = VERSION.indexOf('.') + 1;
            int index2 = VERSION.indexOf('.', index1);
            if (index2 == -1) {
                index2 = VERSION.length();
            }
            return Integer.parseInt(VERSION.substring(index1, index2));
        } catch (Exception e) {
            return -1;
        }
    } // getParserSubVersion():int

    //
    // XMLDocumentHandler methods
    //

    /**
     * Default constructor.
     */
    public DOMParser() {
        super(new HTMLConfiguration());
        /*** extending DOMParser ***/
        try {
            setProperty("http://apache.org/xml/properties/dom/document-class-name",
                    "org.apache.html.dom.HTMLDocumentImpl");
        } catch (org.xml.sax.SAXNotRecognizedException e) {
            throw new RuntimeException(
                    "http://apache.org/xml/properties/dom/document-class-name property not recognized");
        } catch (org.xml.sax.SAXNotSupportedException e) {
            throw new RuntimeException(
                    "http://apache.org/xml/properties/dom/document-class-name property not supported");
        }
        /***
         * extending AbstractDOMParser *** fConfiguration.setProperty(
         * "http://apache.org/xml/properties/dom/document-class-name",
         * "org.apache.html.dom.HTMLDocumentImpl"); /
         ***/
    } // ()

    //
    // Private static methods
    //

    /**
     * Doctype declaration.
     */
    public void doctypeDecl(String root, String pubid, String sysid, Augmentations augs)
            throws XNIException {

        // NOTE: Xerces HTML DOM implementation (up to and including
        // 2.5.0) throws a heirarchy request error exception
        // when a doctype node is appended to the tree. So,
        // don't insert this node into the tree for those
        // versions... -Ac

        String VERSION = XercesBridge.getInstance().getVersion();
        boolean okay = true;
        if (VERSION.startsWith("Xerces-J 2.")) {
            okay = getParserSubVersion() > 5;
        }
        // REVISIT: As soon as XML4J is updated with the latest code
        // from Xerces, then this needs to be updated to
        // check XML4J's version. -Ac
        else if (VERSION.startsWith("XML4J")) {
            okay = false;
        }

        // if okay, insert doctype; otherwise, don't risk it
        if (okay) {
            super.doctypeDecl(root, pubid, sysid, augs);
        }

    } // doctypeDecl(String,String,String,Augmentations)

} // class DOMParser




© 2015 - 2024 Weber Informatics LLC | Privacy Policy