nu.validator.htmlparser.xom.HtmlBuilder Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2007 Henri Sivonen
 * Copyright (c) 2007-2008 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.htmlparser.xom;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;

import nu.validator.htmlparser.common.CharacterHandler;
import nu.validator.htmlparser.common.DoctypeExpectation;
import nu.validator.htmlparser.common.DocumentModeHandler;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.common.TransitionHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.io.Driver;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.ValidityException;

import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/**
 * This class implements an HTML5 parser that exposes data through the XOM 
 * interface. 
 * 
 * By default, when using the constructor without arguments, the 
 * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
 * infosets. This corresponds to ALTER_INFOSET as the general 
 * XML violation policy. It is possible to treat XML 1.0 infoset violations 
 * as fatal by setting the general XML violation policy to FATAL. 
 * 
 * 
The doctype is not represented in the tree.
 * 
 * 
The document mode is represented via the Mode 
 * interface on the Document node if the node implements 
 * that interface (depends on the used node factory).
 * 
 * 
The form pointer is stored if the node factory supports storing it.
 * 
 * This package has its own node factory class because the official 
 * XOM node factory may return multiple nodes instead of one confusing 
 * the assumptions of the DOM-oriented HTML5 parsing algorithm.
 * 
 * @version $Id$
 * @author hsivonen
 */
public class HtmlBuilder extends Builder {

    private Driver driver;

    private final XOMTreeBuilder treeBuilder;

    private final SimpleNodeFactory simpleNodeFactory;

    private EntityResolver entityResolver;

    private ErrorHandler errorHandler = null;

    private DocumentModeHandler documentModeHandler = null;

    private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;

    private boolean checkingNormalization = false;

    private boolean scriptingEnabled = false;

    private final List characterHandlers = new LinkedList();
    
    private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
    
    private boolean html4ModeCompatibleWithXhtml1Schemata = false;

    private boolean mappingLangToXmlLang = false;

    private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
    
    private boolean reportingDoctype = true;

    private ErrorHandler treeBuilderErrorHandler = null;

    private Heuristics heuristics = Heuristics.NONE;

    private TransitionHandler transitionHandler = null;
    
    /**
     * Constructor with default node factory and fatal XML violation policy.
     */
    public HtmlBuilder() {
        this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL);
    }
    
    /**
     * Constructor with given node factory and fatal XML violation policy.
     * @param nodeFactory the factory
     */
    public HtmlBuilder(SimpleNodeFactory nodeFactory) {
        this(nodeFactory, XmlViolationPolicy.FATAL);
    }

    /**
     * Constructor with default node factory and given XML violation policy.
     * @param xmlPolicy the policy
     */
    public HtmlBuilder(XmlViolationPolicy xmlPolicy) {
        this(new SimpleNodeFactory(), xmlPolicy);
    }
    
    /**
     * Constructor with given node factory and given XML violation policy.
     * @param nodeFactory the factory
     * @param xmlPolicy the policy
     */
    public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) {
        super();
        this.simpleNodeFactory = nodeFactory;
        this.treeBuilder = new XOMTreeBuilder(nodeFactory);
        this.driver = null;
        setXmlPolicy(xmlPolicy);
    }

    private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
        if (errorHandler == null && transitionHandler == null
                && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
            return new Tokenizer(handler, newAttributesEachTime);
        } else {
            return new ErrorReportingTokenizer(handler, newAttributesEachTime);
        }
   }
    
    /**
     * This class wraps different tree builders depending on configuration. This 
     * method does the work of hiding this from the user of the class.
     */
    private void lazyInit() {
        if (driver == null) {
            this.driver = new Driver(newTokenizer(treeBuilder, false));
            this.driver.setErrorHandler(errorHandler);
            this.driver.setTransitionHandler(transitionHandler);
            this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
            this.driver.setCheckingNormalization(checkingNormalization);
            this.driver.setCommentPolicy(commentPolicy);
            this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
            this.driver.setContentSpacePolicy(contentSpacePolicy);
            this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
            this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
            this.driver.setXmlnsPolicy(xmlnsPolicy);
            this.driver.setHeuristics(heuristics);
            for (CharacterHandler characterHandler : characterHandlers) {
                this.driver.addCharacterHandler(characterHandler);
            }
            this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
            this.treeBuilder.setDocumentModeHandler(documentModeHandler);
            this.treeBuilder.setScriptingEnabled(scriptingEnabled);
            this.treeBuilder.setReportingDoctype(reportingDoctype);
            this.treeBuilder.setNamePolicy(namePolicy);
        }
    }

    
    private void tokenize(InputSource is) throws ParsingException, IOException,
            MalformedURLException {
        try {
            if (is == null) {
                throw new IllegalArgumentException("Null input.");
            }
            if (is.getByteStream() == null && is.getCharacterStream() == null) {
                String systemId = is.getSystemId();
                if (systemId == null) {
                    throw new IllegalArgumentException(
                            "No byte stream, no character stream nor URI.");
                }
                if (entityResolver != null) {
                    is = entityResolver.resolveEntity(is.getPublicId(),
                            systemId);
                }
                if (is.getByteStream() == null
                        || is.getCharacterStream() == null) {
                    is = new InputSource();
                    is.setSystemId(systemId);
                    is.setByteStream(new URL(systemId).openStream());
                }
            }
            driver.tokenize(is);
        } catch (SAXParseException e) {
            throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(),
                    e.getColumnNumber(), e);
        } catch (SAXException e) {
            throw new ParsingException(e.getMessage(), e);
        }
    }

    /**
     * Parse from SAX InputSource.
     * @param is the InputSource
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     */
    public Document build(InputSource is) throws ParsingException, IOException {
        lazyInit();
        treeBuilder.setFragmentContext(null);
        tokenize(is);
        return treeBuilder.getDocument();
    }

    /**
     * Parse a fragment from SAX InputSource assuming an HTML
     * context.
     * @param is the InputSource
     * @param context the name of the context element (HTML namespace assumed)
     * @return the fragment
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     */
    public Nodes buildFragment(InputSource is, String context)
            throws IOException, ParsingException {
        lazyInit();
        treeBuilder.setFragmentContext(context.intern());
        tokenize(is);
        return treeBuilder.getDocumentFragment();
    }

    /**
     * Parse a fragment from SAX InputSource.
     * @param is the InputSource
     * @param contextLocal the local name of the context element
     * @parem contextNamespace the namespace of the context element
     * @return the fragment
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     */
    public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace)
            throws IOException, ParsingException {
        lazyInit();
        treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
        tokenize(is);
        return treeBuilder.getDocumentFragment();
    }
    
    /**
     * Parse from File.
     * @param file the file
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.io.File)
     */
    @Override
    public Document build(File file) throws ParsingException,
            ValidityException, IOException {
        return build(new FileInputStream(file), file.toURI().toASCIIString());
    }

    /**
     * Parse from InputStream.
     * @param stream the stream
     * @param uri the base URI
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String)
     */
    @Override
    public Document build(InputStream stream, String uri)
            throws ParsingException, ValidityException, IOException {
        InputSource is = new InputSource(stream);
        is.setSystemId(uri);
        return build(is);
    }

    /**
     * Parse from InputStream.
     * @param stream the stream
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.io.InputStream)
     */
    @Override
    public Document build(InputStream stream) throws ParsingException,
            ValidityException, IOException {
        return build(new InputSource(stream));
    }

    /**
     * Parse from Reader.
     * @param stream the reader
     * @param uri the base URI
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.io.Reader, java.lang.String)
     */
    @Override
    public Document build(Reader stream, String uri) throws ParsingException,
            ValidityException, IOException {
        InputSource is = new InputSource(stream);
        is.setSystemId(uri);
        return build(is);
    }

    /**
     * Parse from Reader.
     * @param stream the reader
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.io.Reader)
     */
    @Override
    public Document build(Reader stream) throws ParsingException,
            ValidityException, IOException {
        return build(new InputSource(stream));
    }

    /**
     * Parse from String.
     * @param content the HTML source as string
     * @param uri the base URI
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.lang.String, java.lang.String)
     */
    @Override
    public Document build(String content, String uri) throws ParsingException,
            ValidityException, IOException {
        return build(new StringReader(content), uri);
    }

    /**
     * Parse from URI.
     * @param uri the URI of the document
     * @return the document
     * @throws ParsingException in case of an XML violation
     * @throws IOException if IO goes wrang
     * @see nu.xom.Builder#build(java.lang.String)
     */
    @Override
    public Document build(String uri) throws ParsingException,
            ValidityException, IOException {
        return build(new InputSource(uri));
    }

    /**
     * Gets the node factory
     */
    public SimpleNodeFactory getSimpleNodeFactory() {
        return simpleNodeFactory;
    }

    /**
     * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
     */
    public void setEntityResolver(EntityResolver resolver) {
        entityResolver = resolver;
    }

    /**
     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
     */
    public void setErrorHandler(ErrorHandler handler) {
        errorHandler = handler;
        treeBuilderErrorHandler = handler;
        driver = null;
    }
    
    public void setTransitionHander(TransitionHandler handler) {
        transitionHandler = handler;
        driver = null;
    }

    /**
     * Indicates whether NFC normalization of source is being checked.
     * @return true if NFC normalization of source is being checked.
     * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
     */
    public boolean isCheckingNormalization() {
        return checkingNormalization;
    }

    /**
     * Toggles the checking of the NFC normalization of source.
     * @param enable true to check normalization
     * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
     */
    public void setCheckingNormalization(boolean enable) {
        this.checkingNormalization = enable;
        if (driver != null) {
            driver.setCheckingNormalization(checkingNormalization);
        }
    }

    /**
     * Sets the policy for consecutive hyphens in comments.
     * @param commentPolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
        this.commentPolicy = commentPolicy;
        if (driver != null) {
            driver.setCommentPolicy(commentPolicy);
        }
    }

    /**
     * Sets the policy for non-XML characters except white space.
     * @param contentNonXmlCharPolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setContentNonXmlCharPolicy(
            XmlViolationPolicy contentNonXmlCharPolicy) {
        this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
        driver = null;
    }

    /**
     * Sets the policy for non-XML white space.
     * @param contentSpacePolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
        this.contentSpacePolicy = contentSpacePolicy;
        if (driver != null) {
            driver.setContentSpacePolicy(contentSpacePolicy);
        }
    }

    /**
     * Whether the parser considers scripting to be enabled for noscript treatment.
     * 
     * @return true if enabled
     * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
     */
    public boolean isScriptingEnabled() {
        return scriptingEnabled;
    }

    /**
     * Sets whether the parser considers scripting to be enabled for noscript treatment.
     * @param scriptingEnabled true to enable
     * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
     */
    public void setScriptingEnabled(boolean scriptingEnabled) {
        this.scriptingEnabled = scriptingEnabled;
        if (treeBuilder != null) {
            treeBuilder.setScriptingEnabled(scriptingEnabled);
        }
    }

    /**
     * Returns the doctype expectation.
     * 
     * @return the doctypeExpectation
     */
    public DoctypeExpectation getDoctypeExpectation() {
        return doctypeExpectation;
    }

    /**
     * Sets the doctype expectation.
     * 
     * @param doctypeExpectation
     *            the doctypeExpectation to set
     * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
     */
    public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
        this.doctypeExpectation = doctypeExpectation;
        if (treeBuilder != null) {
            treeBuilder.setDoctypeExpectation(doctypeExpectation);
        }
    }

    /**
     * Returns the document mode handler.
     * 
     * @return the documentModeHandler
     */
    public DocumentModeHandler getDocumentModeHandler() {
        return documentModeHandler;
    }

    /**
     * Sets the document mode handler.
     * 
     * @param documentModeHandler
     *            the documentModeHandler to set
     * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
     */
    public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
        this.documentModeHandler = documentModeHandler;
    }

    /**
     * Returns the streamabilityViolationPolicy.
     * 
     * @return the streamabilityViolationPolicy
     */
    public XmlViolationPolicy getStreamabilityViolationPolicy() {
        return streamabilityViolationPolicy;
    }

    /**
     * Sets the streamabilityViolationPolicy.
     * 
     * @param streamabilityViolationPolicy
     *            the streamabilityViolationPolicy to set
     */
    public void setStreamabilityViolationPolicy(
            XmlViolationPolicy streamabilityViolationPolicy) {
        this.streamabilityViolationPolicy = streamabilityViolationPolicy;
        driver = null;
    }

    /**
     * Whether the HTML 4 mode reports boolean attributes in a way that repeats
     * the name in the value.
     * @param html4ModeCompatibleWithXhtml1Schemata
     */
    public void setHtml4ModeCompatibleWithXhtml1Schemata(
            boolean html4ModeCompatibleWithXhtml1Schemata) {
        this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
        if (driver != null) {
            driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
        }
    }

    /**
     * Returns the Locator during parse.
     * @return the Locator
     */
    public Locator getDocumentLocator() {
        return driver.getDocumentLocator();
    }

    /**
     * Whether the HTML 4 mode reports boolean attributes in a way that repeats
     * the name in the value.
     * 
     * @return the html4ModeCompatibleWithXhtml1Schemata
     */
    public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
        return html4ModeCompatibleWithXhtml1Schemata;
    }

    /**
     * Whether lang is mapped to xml:lang.
     * @param mappingLangToXmlLang
     * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
     */
    public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
        this.mappingLangToXmlLang = mappingLangToXmlLang;
        if (driver != null) {
            driver.setMappingLangToXmlLang(mappingLangToXmlLang);
        }
    }

    /**
     * Whether lang is mapped to xml:lang.
     * 
     * @return the mappingLangToXmlLang
     */
    public boolean isMappingLangToXmlLang() {
        return mappingLangToXmlLang;
    }

    /**
     * Whether the xmlns attribute on the root element is 
     * passed to through. (FATAL not allowed.)
     * @param xmlnsPolicy
     * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
        if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
            throw new IllegalArgumentException("Can't use FATAL here.");
        }
        this.xmlnsPolicy = xmlnsPolicy;
        if (driver != null) {
            driver.setXmlnsPolicy(xmlnsPolicy);
        }
    }

    /**
     * Returns the xmlnsPolicy.
     * 
     * @return the xmlnsPolicy
     */
    public XmlViolationPolicy getXmlnsPolicy() {
        return xmlnsPolicy;
    }

    /**
     * Returns the commentPolicy.
     * 
     * @return the commentPolicy
     */
    public XmlViolationPolicy getCommentPolicy() {
        return commentPolicy;
    }

    /**
     * Returns the contentNonXmlCharPolicy.
     * 
     * @return the contentNonXmlCharPolicy
     */
    public XmlViolationPolicy getContentNonXmlCharPolicy() {
        return contentNonXmlCharPolicy;
    }

    /**
     * Returns the contentSpacePolicy.
     * 
     * @return the contentSpacePolicy
     */
    public XmlViolationPolicy getContentSpacePolicy() {
        return contentSpacePolicy;
    }

    /**
     * @param reportingDoctype
     * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
     */
    public void setReportingDoctype(boolean reportingDoctype) {
        this.reportingDoctype = reportingDoctype;
        if (treeBuilder != null) {
            treeBuilder.setReportingDoctype(reportingDoctype);
        }
    }

    /**
     * Returns the reportingDoctype.
     * 
     * @return the reportingDoctype
     */
    public boolean isReportingDoctype() {
        return reportingDoctype;
    }

    /**
     * The policy for non-NCName element and attribute names.
     * @param namePolicy
     * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setNamePolicy(XmlViolationPolicy namePolicy) {
        this.namePolicy = namePolicy;
        if (driver != null) {
            driver.setNamePolicy(namePolicy);
            treeBuilder.setNamePolicy(namePolicy);
        }
    }
    
    /**
     * Sets the encoding sniffing heuristics.
     * 
     * @param heuristics the heuristics to set
     * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
     */
    public void setHeuristics(Heuristics heuristics) {
        this.heuristics = heuristics;
        if (driver != null) {
            driver.setHeuristics(heuristics);
        }
    }
    
    public Heuristics getHeuristics() {
        return this.heuristics;
    }

    /**
     * This is a catch-all convenience method for setting name, xmlns, content space, 
     * content non-XML char and comment policies in one go. This does not affect the 
     * streamability policy or doctype reporting.
     * 
     * @param xmlPolicy
     */
    public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
        setNamePolicy(xmlPolicy);
        setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
        setContentSpacePolicy(xmlPolicy);
        setContentNonXmlCharPolicy(xmlPolicy);
        setCommentPolicy(xmlPolicy);
    }

    /**
     * The policy for non-NCName element and attribute names.
     * 
     * @return the namePolicy
     */
    public XmlViolationPolicy getNamePolicy() {
        return namePolicy;
    }

    /**
     * Does nothing.
     * @deprecated
     */
    public void setBogusXmlnsPolicy(
            XmlViolationPolicy bogusXmlnsPolicy) {
    }

    /**
     * Returns XmlViolationPolicy.ALTER_INFOSET.
     * @deprecated
     * @return XmlViolationPolicy.ALTER_INFOSET
     */
    public XmlViolationPolicy getBogusXmlnsPolicy() {
        return XmlViolationPolicy.ALTER_INFOSET;
    }
    
    public void addCharacterHandler(CharacterHandler characterHandler) {
        this.characterHandlers.add(characterHandler);
        if (driver != null) {
            driver.addCharacterHandler(characterHandler);
        }
    }

    
    /**
     * Sets whether comment nodes appear in the tree.
     * @param ignoreComments true to ignore comments
     * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
     */
    public void setIgnoringComments(boolean ignoreComments) {
        treeBuilder.setIgnoringComments(ignoreComments);
    }

}